Spaces:
Sleeping
Sleeping
{ | |
"version": "1.0", | |
"truncation": null, | |
"padding": null, | |
"added_tokens": [ | |
{ | |
"id": 0, | |
"content": "[STOP]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 1, | |
"content": "[UNK]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 2, | |
"content": "[SPACE]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
} | |
], | |
"normalizer": null, | |
"pre_tokenizer": { | |
"type": "Whitespace" | |
}, | |
"post_processor": null, | |
"decoder": null, | |
"model": { | |
"type": "BPE", | |
"dropout": null, | |
"unk_token": "[UNK]", | |
"continuing_subword_prefix": null, | |
"end_of_word_suffix": null, | |
"fuse_unk": false, | |
"byte_fallback": false, | |
"vocab": { | |
"[STOP]": 0, | |
"[UNK]": 1, | |
"[SPACE]": 2, | |
"-": 3, | |
"1": 4, | |
"2": 5, | |
"3": 6, | |
"4": 7, | |
"5": 8, | |
"a": 9, | |
"b": 10, | |
"c": 11, | |
"d": 12, | |
"e": 13, | |
"f": 14, | |
"g": 15, | |
"h": 16, | |
"i": 17, | |
"j": 18, | |
"k": 19, | |
"l": 20, | |
"m": 21, | |
"n": 22, | |
"o": 23, | |
"p": 24, | |
"q": 25, | |
"r": 26, | |
"s": 27, | |
"t": 28, | |
"u": 29, | |
"v": 30, | |
"w": 31, | |
"x": 32, | |
"y": 33, | |
"z": 34, | |
"“": 35, | |
"”": 36, | |
"…": 37, | |
"、": 38, | |
"。": 39, | |
"!": 40, | |
"(": 41, | |
")": 42, | |
",": 43, | |
":": 44, | |
";": 45, | |
"?": 46, | |
"an": 47, | |
"i4": 48, | |
"ng": 49, | |
"sh": 50, | |
"ang": 51, | |
"ji": 52, | |
"zh": 53, | |
"u4": 54, | |
"ao": 55, | |
"e5": 56, | |
"i1": 57, | |
"i2": 58, | |
"i3": 59, | |
"u3": 60, | |
"en": 61, | |
"xi": 62, | |
"an4": 63, | |
"ong": 64, | |
"de5": 65, | |
"ch": 66, | |
"e4": 67, | |
"eng": 68, | |
"an2": 69, | |
"uo": 70, | |
"an1": 71, | |
"ing": 72, | |
"u2": 73, | |
"a1": 74, | |
"ao4": 75, | |
"an3": 76, | |
"shi4": 77, | |
"ou3": 78, | |
"ai4": 79, | |
"u1": 80, | |
"e2": 81, | |
"ang4": 82, | |
"en2": 83, | |
"ong1": 84, | |
"a4": 85, | |
"ang1": 86, | |
"ao3": 87, | |
"li": 88, | |
"hu": 89, | |
"you3": 90, | |
"n1": 91, | |
"ang3": 92, | |
"yi1": 93, | |
"shi2": 94, | |
"ing2": 95, | |
"ei4": 96, | |
"yu": 97, | |
"ai2": 98, | |
"er": 99, | |
"ang2": 100, | |
"qi": 101, | |
"eng2": 102, | |
"e1": 103, | |
"e3": 104, | |
"ao1": 105, | |
"di": 106, | |
"wo": 107, | |
"bu4": 108, | |
"wo3": 109, | |
"uo4": 110, | |
"zai4": 111, | |
"ai3": 112, | |
"ou4": 113, | |
"n4": 114, | |
"eng1": 115, | |
"ong4": 116, | |
"me5": 117, | |
"gu": 118, | |
"en1": 119, | |
"shen2": 120, | |
"ui4": 121, | |
"le5": 122, | |
"ong2": 123, | |
"ren2": 124, | |
"a3": 125, | |
"jing": 126, | |
"ou1": 127, | |
"gong1": 128, | |
"uo2": 129, | |
"yi4": 130, | |
"er4": 131, | |
"ei2": 132, | |
"dao4": 133, | |
"zhong1": 134, | |
"ei3": 135, | |
"eng4": 136, | |
"he2": 137, | |
"san1": 138, | |
"xing": 139, | |
"wei4": 140, | |
"ji4": 141, | |
"n2": 142, | |
"uo1": 143, | |
"ta1": 144, | |
"ing4": 145, | |
"bi": 146, | |
"cheng2": 147, | |
"qi1": 148, | |
"wu3": 149, | |
"en4": 150, | |
"bai3": 151, | |
"en3": 152, | |
"shang4": 153, | |
"zhe4": 154, | |
"jia1": 155, | |
"ou2": 156, | |
"ge4": 157, | |
"di4": 158, | |
"yuan2": 159, | |
"xu": 160, | |
"da4": 161, | |
"a2": 162, | |
"li4": 163, | |
"n3": 164, | |
"ni3": 165, | |
"ai1": 166, | |
"xian4": 167, | |
"uo3": 168, | |
"yi3": 169, | |
"ni": 170, | |
"ti": 171, | |
"xin1": 172, | |
"jiu3": 173, | |
"qu": 174, | |
"ing1": 175, | |
"hui4": 176, | |
"si4": 177, | |
"zhi1": 178, | |
"ye4": 179, | |
"li3": 180, | |
"ji1": 181, | |
"lai2": 182, | |
"chu1": 183, | |
"dian4": 184, | |
"zhi4": 185, | |
"guo2": 186, | |
"ling2": 187, | |
"dian3": 188, | |
"er2": 189, | |
"ba1": 190, | |
"mi": 191, | |
"yao4": 192, | |
"jing1": 193, | |
"ao2": 194, | |
"si1": 195, | |
"ong3": 196, | |
"sheng1": 197, | |
"liu4": 198, | |
"shou3": 199, | |
"zhu": 200, | |
"dui4": 201, | |
"jian4": 202, | |
"men": 203, | |
"men5": 204, | |
"zheng4": 205, | |
"yi2": 206, | |
"nian2": 207, | |
"ji2": 208, | |
"ge1": 209, | |
"pi": 210, | |
"yi": 211, | |
"neng2": 212, | |
"zuo4": 213, | |
"fa1": 214, | |
"yu2": 215, | |
"ju4": 216, | |
"xing2": 217, | |
"ye3": 218, | |
"jiu4": 219, | |
"bei4": 220, | |
"jin4": 221, | |
"xia4": 222, | |
"ku": 223, | |
"jin1": 224, | |
"hou4": 225, | |
"qi3": 226, | |
"ju": 227, | |
"xi1": 228, | |
"wu4": 229, | |
"ming2": 230, | |
"ei1": 231, | |
"yue4": 232, | |
"chang3": 233, | |
"hua4": 234, | |
"chu": 235, | |
"qian1": 236, | |
"wan4": 237, | |
"eng3": 238, | |
"ing3": 239, | |
"jiang1": 240, | |
"qian2": 241, | |
"xiang4": 242, | |
"mei2": 243, | |
"xiao3": 244, | |
"fang2": 245, | |
"shu": 246, | |
"wei2": 247, | |
"ke3": 248, | |
"nan2": 249, | |
"hai2": 250, | |
"i5": 251, | |
"kai1": 252, | |
"dong4": 253, | |
"zi5": 254 | |
}, | |
"merges": [ | |
"a n", | |
"i 4", | |
"n g", | |
"s h", | |
"an g", | |
"j i", | |
"z h", | |
"u 4", | |
"a o", | |
"e 5", | |
"i 1", | |
"i 2", | |
"i 3", | |
"u 3", | |
"e n", | |
"x i", | |
"an 4", | |
"o ng", | |
"d e5", | |
"c h", | |
"e 4", | |
"e ng", | |
"an 2", | |
"u o", | |
"an 1", | |
"i ng", | |
"u 2", | |
"a 1", | |
"ao 4", | |
"an 3", | |
"sh i4", | |
"o u3", | |
"a i4", | |
"u 1", | |
"e 2", | |
"ang 4", | |
"en 2", | |
"ong 1", | |
"a 4", | |
"ang 1", | |
"ao 3", | |
"l i", | |
"h u", | |
"y ou3", | |
"n 1", | |
"ang 3", | |
"y i1", | |
"sh i2", | |
"ing 2", | |
"e i4", | |
"y u", | |
"a i2", | |
"e r", | |
"ang 2", | |
"q i", | |
"eng 2", | |
"e 1", | |
"e 3", | |
"ao 1", | |
"d i", | |
"w o", | |
"b u4", | |
"wo 3", | |
"uo 4", | |
"z ai4", | |
"a i3", | |
"o u4", | |
"n 4", | |
"eng 1", | |
"ong 4", | |
"m e5", | |
"g u", | |
"en 1", | |
"sh en2", | |
"u i4", | |
"l e5", | |
"ong 2", | |
"r en2", | |
"a 3", | |
"ji ng", | |
"o u1", | |
"g ong1", | |
"uo 2", | |
"y i4", | |
"er 4", | |
"e i2", | |
"d ao4", | |
"zh ong1", | |
"e i3", | |
"eng 4", | |
"h e2", | |
"s an1", | |
"xi ng", | |
"w ei4", | |
"j i4", | |
"n 2", | |
"uo 1", | |
"t a1", | |
"ing 4", | |
"b i", | |
"ch eng2", | |
"q i1", | |
"w u3", | |
"en 4", | |
"b ai3", | |
"en 3", | |
"sh ang4", | |
"zh e4", | |
"ji a1", | |
"o u2", | |
"g e4", | |
"d i4", | |
"yu an2", | |
"x u", | |
"d a4", | |
"a 2", | |
"l i4", | |
"n 3", | |
"n i3", | |
"a i1", | |
"xi an4", | |
"uo 3", | |
"y i3", | |
"n i", | |
"t i", | |
"xi n1", | |
"ji u3", | |
"q u", | |
"ing 1", | |
"hu i4", | |
"s i4", | |
"zh i1", | |
"y e4", | |
"l i3", | |
"ji 1", | |
"l ai2", | |
"ch u1", | |
"di an4", | |
"zh i4", | |
"g uo2", | |
"l ing2", | |
"di an3", | |
"er 2", | |
"b a1", | |
"m i", | |
"y ao4", | |
"jing 1", | |
"ao 2", | |
"s i1", | |
"ong 3", | |
"sh eng1", | |
"li u4", | |
"sh ou3", | |
"zh u", | |
"d ui4", | |
"ji an4", | |
"m en", | |
"men 5", | |
"zh eng4", | |
"y i2", | |
"ni an2", | |
"ji 2", | |
"g e1", | |
"p i", | |
"y i", | |
"n eng2", | |
"z uo4", | |
"f a1", | |
"y u2", | |
"j u4", | |
"xing 2", | |
"y e3", | |
"ji u4", | |
"b ei4", | |
"ji n4", | |
"xi a4", | |
"k u", | |
"ji n1", | |
"h ou4", | |
"q i3", | |
"j u", | |
"x i1", | |
"w u4", | |
"m ing2", | |
"e i1", | |
"yu e4", | |
"ch ang3", | |
"hu a4", | |
"ch u", | |
"qi an1", | |
"w an4", | |
"eng 3", | |
"ing 3", | |
"ji ang1", | |
"qi an2", | |
"xi ang4", | |
"m ei2", | |
"xi ao3", | |
"f ang2", | |
"sh u", | |
"w ei2", | |
"k e3", | |
"n an2", | |
"h ai2", | |
"i 5", | |
"k ai1", | |
"d ong4", | |
"z i5" | |
] | |
} | |
} |