{ | |
"version": "1.0", | |
"truncation": { | |
"direction": "Right", | |
"max_length": 20, | |
"strategy": "LongestFirst", | |
"stride": 0 | |
}, | |
"padding": { | |
"strategy": "BatchLongest", | |
"direction": "Right", | |
"pad_to_multiple_of": null, | |
"pad_id": 0, | |
"pad_type_id": 0, | |
"pad_token": "<|endoftext|>" | |
}, | |
"added_tokens": [ | |
{ | |
"id": 0, | |
"content": "<|endoftext|>", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
} | |
], | |
"normalizer": null, | |
"pre_tokenizer": { | |
"type": "Digits", | |
"individual_digits": true | |
}, | |
"post_processor": { | |
"type": "ByteLevel", | |
"add_prefix_space": true, | |
"trim_offsets": false, | |
"use_regex": true | |
}, | |
"decoder": { | |
"type": "ByteLevel", | |
"add_prefix_space": true, | |
"trim_offsets": true, | |
"use_regex": true | |
}, | |
"model": { | |
"type": "BPE", | |
"dropout": null, | |
"unk_token": null, | |
"continuing_subword_prefix": null, | |
"end_of_word_suffix": null, | |
"fuse_unk": false, | |
"byte_fallback": false, | |
"vocab": { | |
"<|endoftext|>": 0, | |
"\n": 1, | |
"\r": 2, | |
"$": 3, | |
"+": 4, | |
"0": 5, | |
"1": 6, | |
"2": 7, | |
"3": 8, | |
"4": 9, | |
"5": 10, | |
"6": 11, | |
"7": 12, | |
"8": 13, | |
"9": 14, | |
";": 15, | |
"=": 16, | |
"$$": 17, | |
";=": 18, | |
"$=": 19, | |
"$+": 20, | |
"+;": 21, | |
"=+": 22, | |
"$;": 23, | |
";;": 24, | |
"==": 25, | |
"++": 26, | |
";$": 27, | |
";+": 28, | |
"=;": 29, | |
"=$": 30, | |
"+$": 31, | |
"+=": 32, | |
";$=": 33, | |
"$+=": 34, | |
"=;=": 35, | |
"$+$": 36, | |
"+$=": 37, | |
"=$;": 38, | |
";=$": 39, | |
";=;": 40, | |
"$+;": 41, | |
"=+$": 42, | |
"==;": 43, | |
"+$$": 44, | |
"+;=": 45, | |
"=$$": 46, | |
"$$=": 47, | |
";=+": 48, | |
"$==": 49, | |
"=++": 50, | |
"++=": 51, | |
"$;=": 52, | |
"+$+": 53, | |
";$;": 54, | |
";++": 55, | |
"=$=": 56, | |
"=+;": 57, | |
"==+": 58, | |
"$$$": 59, | |
"$$;": 60, | |
"$$;;": 61, | |
"$=$;": 62, | |
"+;+": 63, | |
"+;+;": 64, | |
"$;+": 65, | |
";;$": 66, | |
"===": 67, | |
"++$": 68, | |
";+$": 69, | |
"\r\n": 70, | |
"+=+": 71, | |
"+$;": 72, | |
"+==": 73, | |
"+=$": 74, | |
";$$": 75, | |
";;=": 76, | |
";$+": 77, | |
";+;": 78, | |
";;=;": 79, | |
"=$+": 80, | |
"=;;": 81, | |
"=;$": 82, | |
"=$+$": 83, | |
"==+$": 84, | |
"$$+": 85, | |
"$$$$": 86, | |
"$$$;": 87, | |
"$$=;": 88, | |
"$$;=+": 89, | |
";==": 90, | |
";==+": 91, | |
";=++": 92, | |
";=;$": 93, | |
";==;": 94, | |
"$=+": 95, | |
"$=++": 96, | |
"$=;+": 97, | |
"$==++": 98, | |
"$++": 99 | |
}, | |
"merges": [ | |
"$ $", | |
"; =", | |
"$ =", | |
"$ +", | |
"+ ;", | |
"= +", | |
"$ ;", | |
"; ;", | |
"= =", | |
"+ +", | |
"; $", | |
"; +", | |
"= ;", | |
"= $", | |
"+ $", | |
"+ =", | |
"; $=", | |
"$+ =", | |
"= ;=", | |
"$+ $", | |
"+ $=", | |
"= $;", | |
";= $", | |
";= ;", | |
"$+ ;", | |
"=+ $", | |
"== ;", | |
"+ $$", | |
"+ ;=", | |
"= $$", | |
"$$ =", | |
";= +", | |
"$= =", | |
"=+ +", | |
"++ =", | |
"$ ;=", | |
"+ $+", | |
"; $;", | |
"; ++", | |
"= $=", | |
"= +;", | |
"= =+", | |
"$$ $", | |
"$$ ;", | |
"$$ ;;", | |
"$= $;", | |
"+; +", | |
"+; +;", | |
"$; +", | |
";; $", | |
"== =", | |
"++ $", | |
";+ $", | |
"\r \n", | |
"+ =+", | |
"+ $;", | |
"+ ==", | |
"+ =$", | |
"; $$", | |
"; ;=", | |
"; $+", | |
"; +;", | |
"; ;=;", | |
"= $+", | |
"= ;;", | |
"= ;$", | |
"= $+$", | |
"= =+$", | |
"$$ +", | |
"$$ $$", | |
"$$ $;", | |
"$$ =;", | |
"$$ ;=+", | |
";= =", | |
";= =+", | |
";= ++", | |
";= ;$", | |
";= =;", | |
"$= +", | |
"$= ++", | |
"$= ;+", | |
"$= =++", | |
"$+ +" | |
] | |
} | |
} |