calc-gpt-2 / tokenizer.json
timo13113's picture
Training in progress, step 100
566b065 verified
raw
history blame
3.97 kB
{
"version": "1.0",
"truncation": {
"direction": "Right",
"max_length": 20,
"strategy": "LongestFirst",
"stride": 0
},
"padding": {
"strategy": "BatchLongest",
"direction": "Right",
"pad_to_multiple_of": null,
"pad_id": 0,
"pad_type_id": 0,
"pad_token": "<|endoftext|>"
},
"added_tokens": [
{
"id": 0,
"content": "<|endoftext|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "Digits",
"individual_digits": true
},
"post_processor": {
"type": "ByteLevel",
"add_prefix_space": true,
"trim_offsets": false,
"use_regex": true
},
"decoder": {
"type": "ByteLevel",
"add_prefix_space": true,
"trim_offsets": true,
"use_regex": true
},
"model": {
"type": "BPE",
"dropout": null,
"unk_token": null,
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"byte_fallback": false,
"vocab": {
"<|endoftext|>": 0,
"\n": 1,
"\r": 2,
"$": 3,
"+": 4,
"0": 5,
"1": 6,
"2": 7,
"3": 8,
"4": 9,
"5": 10,
"6": 11,
"7": 12,
"8": 13,
"9": 14,
";": 15,
"=": 16,
"$$": 17,
";=": 18,
"$=": 19,
"$+": 20,
"+;": 21,
"=+": 22,
"$;": 23,
";;": 24,
"==": 25,
"++": 26,
";$": 27,
";+": 28,
"=;": 29,
"=$": 30,
"+$": 31,
"+=": 32,
";$=": 33,
"$+=": 34,
"=;=": 35,
"$+$": 36,
"+$=": 37,
"=$;": 38,
";=$": 39,
";=;": 40,
"$+;": 41,
"=+$": 42,
"==;": 43,
"+$$": 44,
"+;=": 45,
"=$$": 46,
"$$=": 47,
";=+": 48,
"$==": 49,
"=++": 50,
"++=": 51,
"$;=": 52,
"+$+": 53,
";$;": 54,
";++": 55,
"=$=": 56,
"=+;": 57,
"==+": 58,
"$$$": 59,
"$$;": 60,
"$$;;": 61,
"$=$;": 62,
"+;+": 63,
"+;+;": 64,
"$;+": 65,
";;$": 66,
"===": 67,
"++$": 68,
";+$": 69,
"\r\n": 70,
"+=+": 71,
"+$;": 72,
"+==": 73,
"+=$": 74,
";$$": 75,
";;=": 76,
";$+": 77,
";+;": 78,
";;=;": 79,
"=$+": 80,
"=;;": 81,
"=;$": 82,
"=$+$": 83,
"==+$": 84,
"$$+": 85,
"$$$$": 86,
"$$$;": 87,
"$$=;": 88,
"$$;=+": 89,
";==": 90,
";==+": 91,
";=++": 92,
";=;$": 93,
";==;": 94,
"$=+": 95,
"$=++": 96,
"$=;+": 97,
"$==++": 98,
"$++": 99
},
"merges": [
"$ $",
"; =",
"$ =",
"$ +",
"+ ;",
"= +",
"$ ;",
"; ;",
"= =",
"+ +",
"; $",
"; +",
"= ;",
"= $",
"+ $",
"+ =",
"; $=",
"$+ =",
"= ;=",
"$+ $",
"+ $=",
"= $;",
";= $",
";= ;",
"$+ ;",
"=+ $",
"== ;",
"+ $$",
"+ ;=",
"= $$",
"$$ =",
";= +",
"$= =",
"=+ +",
"++ =",
"$ ;=",
"+ $+",
"; $;",
"; ++",
"= $=",
"= +;",
"= =+",
"$$ $",
"$$ ;",
"$$ ;;",
"$= $;",
"+; +",
"+; +;",
"$; +",
";; $",
"== =",
"++ $",
";+ $",
"\r \n",
"+ =+",
"+ $;",
"+ ==",
"+ =$",
"; $$",
"; ;=",
"; $+",
"; +;",
"; ;=;",
"= $+",
"= ;;",
"= ;$",
"= $+$",
"= =+$",
"$$ +",
"$$ $$",
"$$ $;",
"$$ =;",
"$$ ;=+",
";= =",
";= =+",
";= ++",
";= ;$",
";= =;",
"$= +",
"$= ++",
"$= ;+",
"$= =++",
"$+ +"
]
}
}