diff --git "a/tokenizer.json" "b/tokenizer.json" new file mode 100644--- /dev/null +++ "b/tokenizer.json" @@ -0,0 +1,24541 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 3, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 4, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": true + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "": 0, + "": 1, + "": 2, + "": 3, + "": 4, + "!": 5, + "\"": 6, + "#": 7, + "$": 8, + "%": 9, + "&": 10, + "'": 11, + "(": 12, + ")": 13, + "*": 14, + "+": 15, + ",": 16, + "-": 17, + ".": 18, + "/": 19, + "0": 20, + "1": 21, + "2": 22, + "3": 23, + "4": 24, + "5": 25, + "6": 26, + "7": 27, + "8": 28, + "9": 29, + ":": 30, + ";": 31, + "<": 32, + "=": 33, + ">": 34, + "?": 35, + "@": 36, + "A": 37, + "B": 38, + "C": 39, + "D": 40, + "E": 41, + "F": 42, + "G": 43, + "H": 44, + "I": 45, + "J": 46, + "K": 47, + "L": 48, + "M": 49, + "N": 50, + "O": 51, + "P": 52, + "Q": 53, + "R": 54, + "S": 55, + "T": 56, + "U": 57, + "V": 58, + "W": 59, + "X": 60, + "Y": 61, + "Z": 62, + "[": 63, + "\\": 64, + "]": 65, + "^": 66, + "_": 67, + "`": 68, + "a": 69, + "b": 70, + "c": 71, + "d": 72, + "e": 73, + "f": 74, + "g": 75, + "h": 76, + "i": 77, + "j": 78, + "k": 79, + "l": 80, + "m": 81, + "n": 82, + "o": 83, + "p": 84, + "q": 85, + "r": 86, + "s": 87, + "t": 88, + "u": 89, + "v": 90, + "w": 91, + "x": 92, + "y": 93, + "z": 94, + "{": 95, + "|": 96, + "}": 97, + "~": 98, + "¡": 99, + "¢": 100, + "£": 101, + "¤": 102, + "¥": 103, + "¦": 104, + "§": 105, + "¨": 106, + "©": 107, + "ª": 108, + "«": 109, + "¬": 110, + "®": 111, + "¯": 112, + "°": 113, + "±": 114, + "²": 115, + "³": 116, + "´": 117, + "µ": 118, + "¶": 119, + "·": 120, + "¸": 121, + "¹": 122, + "º": 123, + "»": 124, + "¼": 125, + "½": 126, + "¾": 127, + "¿": 128, + "À": 129, + "Á": 130, + "Â": 131, + "Ã": 132, + "Ä": 133, + "Å": 134, + "Æ": 135, + "Ç": 136, + "È": 137, + "É": 138, + "Ê": 139, + "Ë": 140, + "Ì": 141, + "Í": 142, + "Î": 143, + "Ï": 144, + "Ð": 145, + "Ñ": 146, + "Ò": 147, + "Ó": 148, + "Ô": 149, + "Õ": 150, + "Ö": 151, + "×": 152, + "Ø": 153, + "Ù": 154, + "Ú": 155, + "Û": 156, + "Ü": 157, + "Ý": 158, + "Þ": 159, + "ß": 160, + "à": 161, + "á": 162, + "â": 163, + "ã": 164, + "ä": 165, + "å": 166, + "æ": 167, + "ç": 168, + "è": 169, + "é": 170, + "ê": 171, + "ë": 172, + "ì": 173, + "í": 174, + "î": 175, + "ï": 176, + "ð": 177, + "ñ": 178, + "ò": 179, + "ó": 180, + "ô": 181, + "õ": 182, + "ö": 183, + "÷": 184, + "ø": 185, + "ù": 186, + "ú": 187, + "û": 188, + "ü": 189, + "ý": 190, + "þ": 191, + "ÿ": 192, + "Ā": 193, + "ā": 194, + "Ă": 195, + "ă": 196, + "Ą": 197, + "ą": 198, + "Ć": 199, + "ć": 200, + "Ĉ": 201, + "ĉ": 202, + "Ċ": 203, + "ċ": 204, + "Č": 205, + "č": 206, + "Ď": 207, + "ď": 208, + "Đ": 209, + "đ": 210, + "Ē": 211, + "ē": 212, + "Ĕ": 213, + "ĕ": 214, + "Ė": 215, + "ė": 216, + "Ę": 217, + "ę": 218, + "Ě": 219, + "ě": 220, + "Ĝ": 221, + "ĝ": 222, + "Ğ": 223, + "ğ": 224, + "Ġ": 225, + "ġ": 226, + "Ģ": 227, + "ģ": 228, + "Ĥ": 229, + "ĥ": 230, + "Ħ": 231, + "ħ": 232, + "Ĩ": 233, + "ĩ": 234, + "Ī": 235, + "ī": 236, + "Ĭ": 237, + "ĭ": 238, + "Į": 239, + "į": 240, + "İ": 241, + "ı": 242, + "IJ": 243, + "ij": 244, + "Ĵ": 245, + "ĵ": 246, + "Ķ": 247, + "ķ": 248, + "ĸ": 249, + "Ĺ": 250, + "ĺ": 251, + "Ļ": 252, + "ļ": 253, + "Ľ": 254, + "ľ": 255, + "Ŀ": 256, + "ŀ": 257, + "Ł": 258, + "ł": 259, + "Ń": 260, + "ĠĠ": 261, + "ĠĠĠĠ": 262, + "ĠĠĠ": 263, + "ĠĠĠĠĠĠĠĠ": 264, + "in": 265, + "00": 266, + "er": 267, + "on": 268, + "re": 269, + "at": 270, + "ĊĠĠĠĠ": 271, + "or": 272, + "Ġ\"": 273, + "Ġt": 274, + "en": 275, + "st": 276, + "an": 277, + "ar": 278, + "al": 279, + "Ġ0": 280, + "ĊĠĠĠĠĠĠĠĠ": 281, + "as": 282, + "ĊĠĠĠ": 283, + "Ġ<": 284, + "ĊĠĠĠĠĠĠĠ": 285, + "he": 286, + "it": 287, + "bo": 288, + "ed": 289, + "ion": 290, + "ur": 291, + "le": 292, + "ct": 293, + "Ġc": 294, + "Ġs": 295, + "es": 296, + "wl": 297, + "obo": 298, + "Ġr": 299, + "et": 300, + "//": 301, + "om": 302, + "=\"": 303, + "ing": 304, + "li": 305, + "de": 306, + "Ġ1": 307, + "Ġ*": 308, + "Ġf": 309, + "is": 310, + "Ġ=": 311, + "Ġo": 312, + "ht": 313, + "am": 314, + "Ġp": 315, + "ce": 316, + "ut": 317, + "Ġ-": 318, + "": 392, + "se": 393, + "ction": 394, + "Ġre": 395, + "ĠA": 396, + "br": 397, + "im": 398, + "ary": 399, + "10": 400, + "ate": 401, + "12": 402, + "Ġth": 403, + "est": 404, + "ap": 405, + "ty": 406, + "ĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠ": 407, + "str": 408, + "Ġv": 409, + "ource": 410, + "Ġrdf": 411, + "ĠC": 412, + "ig": 413, + "url": 414, + "ck": 415, + "um": 416, + "Owl": 417, + "oboIn": 418, + "oboInOwl": 419, + "per": 420, + "iv": 421, + "ter": 422, + "ĠT": 423, + "15": 424, + "ated": 425, + "20": 426, + "pt": 427, + "/>": 428, + "ag": 429, + "Ġ[": 430, + "brary": 431, + "ue": 432, + "Ġ4": 433, + "ff": 434, + "library": 435, + "ont": 436, + "purl": 437, + "obolibrary": 438, + "ww": 439, + "16": 440, + "Class": 441, + "ĠS": 442, + "Ġ;": 443, + "Ġand": 444, + "has": 445, + "ab": 446, + "Ġ:": 447, + "æĿ": 448, + "ON": 449, + "19": 450, + "åı": 451, + "Ġ": 460, + "ra": 461, + "32": 462, + "Ġ8": 463, + "ER": 464, + "Ġ'": 465, + "os": 466, + "ann": 467, + "ĠM": 468, + "ode": 469, + "..": 470, + "13": 471, + "Ġ},": 472, + "----": 473, + "Ġl": 474, + "iz": 475, + "æĿij": 476, + "Ġis": 477, + "oc": 478, + "24": 479, + "ĠF": 480, + "ine": 481, + "int": 482, + "18": 483, + "ype": 484, + "ith": 485, + "resource": 486, + "Ċĉ": 487, + "30": 488, + "def": 489, + "all": 490, + "ect": 491, + "sp": 492, + "ĠI": 493, + "==": 494, + "17": 495, + "annot": 496, + "ĠP": 497, + "****": 498, + "com": 499, + "user": 500, + "->": 501, + "ow": 502, + "50": 503, + "ĊĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠ": 504, + "op": 505, + "od": 506, + "Ġ7": 507, + "ĠE": 508, + "put": 509, + "ĠD": 510, + "art": 511, + "ym": 512, + "åĮ": 513, + "annotated": 514, + "end": 515, + "99": 516, + "60": 517, + "arg": 518, + "Ġfor": 519, + "ver": 520, + "urn": 521, + "éķ": 522, + "Ġe": 523, + "ss": 524, + "ata": 525, + "64": 526, + "for": 527, + "ĠĠĠĠĠ": 528, + "Str": 529, + "set": 530, + "ĠB": 531, + "rom": 532, + "äº": 533, + "ir": 534, + "è·": 535, + "ma": 536, + "Ċĉĉ": 537, + "rdf": 538, + "__": 539, + "ĠN": 540, + "sh": 541, + "è·¯": 542, + "éķĩ": 543, + "up": 544, + "ort": 545, + "roper": 546, + "IN": 547, + "25": 548, + "Ġst": 549, + "åį": 550, + "ke": 551, + "34": 552, + "lab": 553, + "ber": 554, + "string": 555, + "Ġclass": 556, + "turn": 557, + "Ġ.": 558, + "Re": 559, + "AAAA": 560, + "ĊĊ": 561, + "28": 562, + "bl": 563, + "roperty": 564, + "act": 565, + "ĠL": 566, + "90": 567, + "https": 568, + "ID": 569, + "che": 570, + "Ġan": 571, + "rdfs": 572, + "Ġbe": 573, + "ment": 574, + "age": 575, + "å¤": 576, + "Ġif": 577, + "><": 578, + "av": 579, + "ject": 580, + "),": 581, + "40": 582, + "sub": 583, + "ers": 584, + "alue": 585, + "åħ": 586, + "span": 587, + "yn": 588, + "],": 589, + "ity": 590, + "ĠG": 591, + "ip": 592, + "ile": 593, + "Ġ5": 594, + "Ġowl": 595, + "ew": 596, + "ess": 597, + "ite": 598, + "22": 599, + "www": 600, + "EN": 601, + "tr": 602, + "ure": 603, + "te": 604, + "âĢ": 605, + "å®": 606, + "11": 607, + "ore": 608, + "con": 609, + "Ġy": 610, + "70": 611, + "ive": 612, + "Ġthat": 613, + "è¡": 614, + "33": 615, + "uct": 616, + "out": 617, + "ult": 618, + "ption": 619, + "di": 620, + "Db": 621, + "label": 622, + "Ġ6": 623, + "ase": 624, + "ang": 625, + "Ġsh": 626, + "type": 627, + "Ġor": 628, + "Ġon": 629, + "Ġcon": 630, + "29": 631, + "ĠĠĠĠĠĠ": 632, + "lic": 633, + "åĮº": 634, + "oun": 635, + "Property": 636, + "Xref": 637, + "DbXref": 638, + "hasDbXref": 639, + "TE": 640, + "app": 641, + "ink": 642, + "cri": 643, + "38": 644, + "ĊĠĠĠĠĠĠĠĠĠ": 645, + "27": 646, + "ist": 647, + "Of": 648, + "35": 649, + "çļ": 650, + "å·": 651, + "ly": 652, + "Ġpro": 653, + "ac": 654, + "AR": 655, + "()": 656, + "Ġwh": 657, + "##": 658, + "ï¼": 659, + "ay": 660, + "çļĦ": 661, + "ken": 662, + "ack": 663, + "ãĢ": 664, + "68": 665, + "åı·": 666, + "able": 667, + "der": 668, + "31": 669, + "UB": 670, + "Ex": 671, + "æĸ": 672, + "Ġreturn": 673, + "37": 674, + "iom": 675, + "Ġ/*": 676, + "ĊĊĠĠĠ": 677, + "port": 678, + "arget": 679, + "ef": 680, + "44": 681, + "Ġ&": 682, + "Ġit": 683, + "65": 684, + "39": 685, + "è¡Ĺ": 686, + "ĊĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠ": 687, + "ize": 688, + "67": 689, + "amp": 690, + "ome": 691, + "Ġ9": 692, + "',": 693, + "Ġint": 694, + "String": 695, + "Ġ_": 696, + "TI": 697, + "ace": 698, + "ĠR": 699, + "Ġ#": 700, + "Ġ|": 701, + "Ġthis": 702, + "--------": 703, + "Ġ\\": 704, + "OR": 705, + "riction": 706, + "Rest": 707, + "48": 708, + "ant": 709, + "Ġwith": 710, + "ĠW": 711, + "Restriction": 712, + "ml": 713, + "form": 714, + "ĊĠĠ": 715, + "Ġu": 716, + "one": 717, + "ft": 718, + "oo": 719, + "GO": 720, + "69": 721, + "ä¹": 722, + "))": 723, + "onym": 724, + "ial": 725, + "ind": 726, + "55": 727, + "off": 728, + "Ġ//": 729, + "err": 730, + "Ġhref": 731, + "Syn": 732, + "********": 733, + "elf": 734, + "200": 735, + "ember": 736, + "ight": 737, + "æľ": 738, + "UBER": 739, + "UBERON": 740, + "ren": 741, + "Ġde": 742, + "Ġal": 743, + "ain": 744, + "Ġex": 745, + "Ax": 746, + "21": 747, + "token": 748, + "Un": 749, + "dr": 750, + "ĠO": 751, + "ath": 752, + "bject": 753, + "åįĹ": 754, + "Synonym": 755, + "../": 756, + "Ġtr": 757, + "lw": 758, + "lay": 759, + "Loc": 760, + "Ġ38": 761, + "ug": 762, + "pro": 763, + "Target": 764, + "::": 765, + "01": 766, + "Ġ==": 767, + "olor": 768, + "oid": 769, + "ues": 770, + "26": 771, + "58": 772, + "cl": 773, + "ust": 774, + "ia": 775, + "ï¼Į": 776, + "ition": 777, + "log": 778, + "ong": 779, + "ve": 780, + "RO": 781, + "====": 782, + "ET": 783, + "name": 784, + "IA": 785, + "arsh": 786, + "Axiom": 787, + "><": 847, + "ou": 848, + "bout": 849, + "fo": 850, + "Ġun": 851, + "key": 852, + "98": 853, + "ix": 854, + "ert": 855, + "escri": 856, + "define": 857, + "ä»": 858, + "();": 859, + "举": 860, + "å±": 861, + "ere": 862, + "dd": 863, + "éģ": 864, + "RE": 865, + "code": 866, + "ire": 867, + "test": 868, + "fs": 869, + "æ²": 870, + "AL": 871, + "åľ": 872, + "ans": 873, + "Game": 874, + "æ±": 875, + "ithub": 876, + "čĊ": 877, + "66": 878, + "erm": 879, + "00000": 880, + "å°": 881, + "ard": 882, + "23": 883, + "gr": 884, + "offsetStart": 885, + "AAAAAAAA": 886, + "github": 887, + "115": 888, + "pec": 889, + "Ġse": 890, + "77": 891, + "output": 892, + "ik": 893, + "AC": 894, + "ost": 895, + "cont": 896, + "ĊĠĠĠĠĠĠ": 897, + "link": 898, + "Ġfrom": 899, + "ED": 900, + "ML": 901, + "chema": 902, + "ĊĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠ": 903, + "cc": 904, + "sion": 905, + "ell": 906, + "par": 907, + "vent": 908, + "Ġx": 909, + "rite": 910, + "vi": 911, + "ount": 912, + "AT": 913, + "yst": 914, + "Ġat": 915, + "Ġi": 916, + "Text": 917, + "Ġ10": 918, + "çĶ": 919, + "ä¸Ģ": 920, + "ib": 921, + "Ġ)": 922, + "ĠThe": 923, + "ime": 924, + "Ġare": 925, + "88": 926, + "ies": 927, + "Ġtype": 928, + "Struct": 929, + "ance": 930, + "cre": 931, + "IAO": 932, + "get": 933, + "Ġ20": 934, + "idth": 935, + "å··": 936, + "but": 937, + "FF": 938, + "low": 939, + "mp": 940, + "æĺ": 941, + "ence": 942, + "æĸ°": 943, + "Ar": 944, + "An": 945, + "59": 946, + "54": 947, + "ph": 948, + "Ġle": 949, + "},": 950, + "ll": 951, + ".": 1024, + "ĊĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠĠ": 1025, + "The": 1026, + "!--": 1027, + "Exact": 1028, + "74": 1029, + "value": 1030, + "some": 1031, + "hasExact": 1032, + "hasExactSynonym": 1033, + "éĻ": 1034, + "åĪ": 1035, + "text": 1036, + "ribut": 1037, + "po": 1038, + "Ġvoid": 1039, + "annotatedSource": 1040, + "ys": 1041, + "annotatedProperty": 1042, + "ich": 1043, + "ES": 1044, + "05": 1045, + "Ġ$": 1046, + "Marshal": 1047, + "fa": 1048, + "ValuesFrom": 1049, + "own": 1050, + "Ġhttp": 1051, + "struct": 1052, + "ser": 1053, + "éĩ": 1054, + "und": 1055, + "orm": 1056, + "ld": 1057, + "someValuesFrom": 1058, + "pos": 1059, + "Ġen": 1060, + "Read": 1061, + "vel": 1062, + "ork": 1063, + "sym": 1064, + "AN": 1065, + "åŃ": 1066, + "æ¥": 1067, + "eth": 1068, + "ge": 1069, + "lock": 1070, + "pre": 1071, + "Ġerr": 1072, + "pace": 1073, + "ĠAs": 1074, + "atype": 1075, + "af": 1076, + "('": 1077, + "56": 1078, + "ific": 1079, + "Ġyou": 1080, + "ä¸Ń": 1081, + "lect": 1082, + "Ġ\",": 1083, + "datatype": 1084, + "XMLS": 1085, + "XMLSchema": 1086, + "Ġ/": 1087, + "AD": 1088, + "Ġnew": 1089, + "ock": 1090, + "ival": 1091, + "TR": 1092, + "TION": 1093, + "file": 1094, + "Ġlbl": 1095, + "ud": 1096, + "pi": 1097, + "Ġbo": 1098, + "94": 1099, + "HE": 1100, + "ne": 1101, + "atic": 1102, + "uff": 1103, + "ĠTh": 1104, + "OT": 1105, + "ĠMember": 1106, + "UN": 1107, + "Ġvalue": 1108, + "mapping": 1109, + "og": 1110, + "Ġdef": 1111, + "return": 1112, + "Ġ