Transformers
tokenizer_v1 / tokenizer.json
Hack90's picture
Upload tokenizer
f6472a3 verified
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<|endoftext|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "ByteLevel",
"add_prefix_space": false,
"trim_offsets": true,
"use_regex": true
},
"post_processor": null,
"decoder": {
"type": "ByteLevel",
"add_prefix_space": true,
"trim_offsets": true,
"use_regex": true
},
"model": {
"type": "BPE",
"dropout": null,
"unk_token": "<|endoftext|>",
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"byte_fallback": false,
"ignore_merges": false,
"vocab": {
"<|endoftext|>": 0,
"a": 1,
"t": 2,
"c": 3,
"g": 4,
"n": 5,
"k": 6,
"z": 7,
"m": 8,
"l": 9,
"1": 10,
"2": 11,
"3": 12,
"4": 13,
"5": 14,
"6": 15,
"7": 16,
"8": 17,
"9": 18,
"10": 19,
"11": 20,
"12": 21,
"13": 22,
"14": 23,
"15": 24,
"16": 25,
"17": 26,
"18": 27,
"19": 28,
"20": 29,
"21": 30,
"22": 31,
"23": 32,
"24": 33,
"25": 34,
"26": 35,
"27": 36,
"28": 37,
"29": 38,
"30": 39,
"31": 40,
"32": 41,
"33": 42,
"34": 43,
"35": 44,
"36": 45,
"37": 46,
"38": 47,
"39": 48,
"40": 49,
"41": 50,
"42": 51,
"43": 52,
"44": 53,
"45": 54,
"46": 55,
"47": 56,
"48": 57,
"49": 58,
"50": 59,
"51": 60,
"52": 61,
"53": 62,
"54": 63,
"55": 64,
"56": 65,
"57": 66,
"58": 67,
"59": 68,
"60": 69,
"61": 70,
"62": 71,
"63": 72,
"64": 73,
"65": 74,
"66": 75,
"67": 76,
"68": 77,
"69": 78,
"70": 79,
"71": 80,
"72": 81,
"73": 82,
"74": 83,
"75": 84,
"76": 85,
"77": 86,
"78": 87,
"79": 88,
"80": 89,
"81": 90,
"82": 91,
"83": 92,
"84": 93,
"85": 94,
"86": 95,
"87": 96,
"88": 97,
"89": 98,
"90": 99,
"91": 100,
"92": 101,
"93": 102,
"94": 103,
"95": 104,
"96": 105,
"97": 106,
"98": 107,
"99": 108,
"100": 109
},
"merges": []
}
}