PyTorch
English
bert
jdrechsel commited on
Commit
28e5ec4
·
verified ·
1 Parent(s): 887332a

Upload tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1,503 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ ":=": 29484,
3
+ "Bmatrix": 29485,
4
+ "Vmatrix": 29486,
5
+ "\\%": 29481,
6
+ "\\Alpha": 28996,
7
+ "\\And": 28997,
8
+ "\\Arrowvert": 28998,
9
+ "\\Beta": 28999,
10
+ "\\Cap": 29000,
11
+ "\\Chi": 29001,
12
+ "\\Cup": 29002,
13
+ "\\Delta": 29003,
14
+ "\\Doteq": 29004,
15
+ "\\Downarrow": 29005,
16
+ "\\Epsilon": 29006,
17
+ "\\Eta": 29007,
18
+ "\\Gamma": 29008,
19
+ "\\Iota": 29009,
20
+ "\\Join": 29010,
21
+ "\\Kappa": 29011,
22
+ "\\Lambda": 29012,
23
+ "\\Leftarrow": 29013,
24
+ "\\Leftrightarrow": 29014,
25
+ "\\Lleftarrow": 29015,
26
+ "\\Longleftarrow": 29016,
27
+ "\\Longleftrightarrow": 29017,
28
+ "\\Longrightarrow": 29018,
29
+ "\\Lsh": 29019,
30
+ "\\Mu": 29020,
31
+ "\\Nu": 29021,
32
+ "\\Omega": 29022,
33
+ "\\Omicron": 29023,
34
+ "\\Phi": 29024,
35
+ "\\Pi": 29025,
36
+ "\\Pr": 29026,
37
+ "\\Psi": 29027,
38
+ "\\Re": 29028,
39
+ "\\Rho": 29029,
40
+ "\\Rightarrow": 29030,
41
+ "\\Rrightarrow": 29031,
42
+ "\\Rsh": 29032,
43
+ "\\Sigma": 29033,
44
+ "\\Subset": 29034,
45
+ "\\Supset": 29035,
46
+ "\\Tau": 29036,
47
+ "\\Theta": 29037,
48
+ "\\Uparrow": 29038,
49
+ "\\Updownarrow": 29039,
50
+ "\\Upsilon": 29040,
51
+ "\\VarLambda": 29041,
52
+ "\\VarOmega": 29042,
53
+ "\\Vert": 29043,
54
+ "\\Xi": 29044,
55
+ "\\Zeta": 29045,
56
+ "\\\\": 29483,
57
+ "\\^": 29482,
58
+ "\\above": 29046,
59
+ "\\aleph": 29047,
60
+ "\\alpha": 29048,
61
+ "\\amalg": 29049,
62
+ "\\angle": 29050,
63
+ "\\approx": 29051,
64
+ "\\approxeq": 29052,
65
+ "\\arccos": 29053,
66
+ "\\arcsin": 29054,
67
+ "\\arctan": 29055,
68
+ "\\arg": 29056,
69
+ "\\arrowvert": 29057,
70
+ "\\ast": 29058,
71
+ "\\asymp": 29059,
72
+ "\\atop": 29060,
73
+ "\\backepsilon": 29061,
74
+ "\\backsim": 29062,
75
+ "\\backsimeq": 29063,
76
+ "\\backslash": 29064,
77
+ "\\barwedge": 29065,
78
+ "\\begin": 29069,
79
+ "\\beta": 29066,
80
+ "\\beth": 29067,
81
+ "\\between": 29068,
82
+ "\\bigcap": 29070,
83
+ "\\bigcirc": 29071,
84
+ "\\bigcup": 29072,
85
+ "\\bigodot": 29073,
86
+ "\\bigoplus": 29074,
87
+ "\\bigotimes": 29075,
88
+ "\\bigsqcup": 29076,
89
+ "\\bigtriangledown": 29077,
90
+ "\\bigtriangleup": 29078,
91
+ "\\biguplus": 29079,
92
+ "\\bigvee": 29080,
93
+ "\\bigwedge": 29081,
94
+ "\\binom": 29082,
95
+ "\\bmod": 29083,
96
+ "\\bot": 29084,
97
+ "\\bowtie": 29085,
98
+ "\\bracevert": 29086,
99
+ "\\brack": 29087,
100
+ "\\buildrel": 29088,
101
+ "\\cap": 29089,
102
+ "\\cdot": 29090,
103
+ "\\cdots": 29091,
104
+ "\\cfrac": 29092,
105
+ "\\chi": 29093,
106
+ "\\choose": 29094,
107
+ "\\circ": 29095,
108
+ "\\circeq": 29096,
109
+ "\\circlearrowleft": 29097,
110
+ "\\circlearrowright": 29098,
111
+ "\\colon": 29099,
112
+ "\\cong": 29100,
113
+ "\\coprod": 29101,
114
+ "\\cos": 29102,
115
+ "\\cosh": 29103,
116
+ "\\cot": 29104,
117
+ "\\coth": 29105,
118
+ "\\cr": 29106,
119
+ "\\csc": 29107,
120
+ "\\cup": 29108,
121
+ "\\curlyeqprec": 29109,
122
+ "\\curlyeqsucc": 29110,
123
+ "\\curlyvee": 29111,
124
+ "\\curlywedge": 29112,
125
+ "\\curvearrowleft": 29113,
126
+ "\\curvearrowright": 29114,
127
+ "\\dashv": 29115,
128
+ "\\dbinom": 29116,
129
+ "\\ddddot": 29117,
130
+ "\\dddot": 29118,
131
+ "\\ddot": 29119,
132
+ "\\ddots": 29120,
133
+ "\\deg": 29121,
134
+ "\\delta": 29122,
135
+ "\\det": 29123,
136
+ "\\dfrac": 29124,
137
+ "\\dim": 29125,
138
+ "\\displaystyle": 29126,
139
+ "\\div": 29127,
140
+ "\\divideontimes": 29128,
141
+ "\\dot=": 29137,
142
+ "\\doteq": 29136,
143
+ "\\dotplus": 29129,
144
+ "\\dots": 29130,
145
+ "\\dotsb": 29131,
146
+ "\\dotsc": 29132,
147
+ "\\dotsi": 29133,
148
+ "\\dotsm": 29134,
149
+ "\\dotso": 29135,
150
+ "\\downarrow": 29138,
151
+ "\\downdownarrows": 29139,
152
+ "\\downharpoonleft": 29140,
153
+ "\\downharpoonright": 29141,
154
+ "\\ell": 29142,
155
+ "\\empty": 29143,
156
+ "\\emptyset": 29144,
157
+ "\\end": 29145,
158
+ "\\enspace": 29146,
159
+ "\\epsilon": 29147,
160
+ "\\eqcirc": 29148,
161
+ "\\eqsim": 29149,
162
+ "\\eqslantgtr": 29150,
163
+ "\\eqslantless": 29151,
164
+ "\\equiv": 29152,
165
+ "\\eta": 29153,
166
+ "\\eth": 29154,
167
+ "\\exists": 29155,
168
+ "\\exp": 29156,
169
+ "\\forall": 29157,
170
+ "\\frac": 29158,
171
+ "\\frown": 29159,
172
+ "\\gamma": 29160,
173
+ "\\gcd": 29161,
174
+ "\\ge": 29162,
175
+ "\\geq": 29163,
176
+ "\\geqq": 29164,
177
+ "\\geqslant": 29165,
178
+ "\\gets": 29166,
179
+ "\\gg": 29167,
180
+ "\\ggg": 29168,
181
+ "\\gggtr": 29169,
182
+ "\\gnapprox": 29170,
183
+ "\\gneq": 29171,
184
+ "\\gneqq": 29172,
185
+ "\\gnsim": 29173,
186
+ "\\gt": 29174,
187
+ "\\gtrapprox": 29175,
188
+ "\\gtrdot": 29176,
189
+ "\\gtreqless": 29177,
190
+ "\\gtreqqless": 29178,
191
+ "\\gtrless": 29179,
192
+ "\\gtrsim": 29180,
193
+ "\\gvertneqq": 29181,
194
+ "\\hom": 29182,
195
+ "\\hookleftarrow": 29183,
196
+ "\\hookrightarrow": 29184,
197
+ "\\iddots": 29185,
198
+ "\\idotsint": 29186,
199
+ "\\iff": 29187,
200
+ "\\iiiint": 29188,
201
+ "\\iiint": 29189,
202
+ "\\iint": 29190,
203
+ "\\imath": 29191,
204
+ "\\impliedby": 29192,
205
+ "\\implies": 29193,
206
+ "\\in": 29194,
207
+ "\\inf": 29195,
208
+ "\\infty": 29196,
209
+ "\\injlim": 29197,
210
+ "\\int": 29198,
211
+ "\\intop": 29199,
212
+ "\\iota": 29200,
213
+ "\\jmath": 29201,
214
+ "\\kappa": 29202,
215
+ "\\ker": 29203,
216
+ "\\lVert": 29204,
217
+ "\\lambda": 29205,
218
+ "\\land": 29206,
219
+ "\\langle": 29207,
220
+ "\\lbrace": 29208,
221
+ "\\lbrack": 29210,
222
+ "\\lceil": 29209,
223
+ "\\ldots": 29211,
224
+ "\\le": 29212,
225
+ "\\leadsto": 29213,
226
+ "\\left": 29214,
227
+ "\\leftarrow": 29215,
228
+ "\\leftarrowtail": 29216,
229
+ "\\leftharpoondown": 29217,
230
+ "\\leftharpoonup": 29218,
231
+ "\\leftleftarrows": 29219,
232
+ "\\leftrightarrow": 29220,
233
+ "\\leftrightarrows": 29221,
234
+ "\\leftrightharpoons": 29222,
235
+ "\\leftrightsquigarrow": 29223,
236
+ "\\leq": 29224,
237
+ "\\leqq": 29225,
238
+ "\\leqslant": 29226,
239
+ "\\lessapprox": 29227,
240
+ "\\lessdot": 29228,
241
+ "\\lesssim": 29229,
242
+ "\\lfloor": 29230,
243
+ "\\lg": 29231,
244
+ "\\lgroup": 29232,
245
+ "\\lim": 29233,
246
+ "\\liminf": 29234,
247
+ "\\limsup": 29235,
248
+ "\\ll": 29236,
249
+ "\\lmoustache": 29237,
250
+ "\\ln": 29238,
251
+ "\\lnapprox": 29239,
252
+ "\\lneq": 29240,
253
+ "\\lneqq": 29241,
254
+ "\\lnot": 29242,
255
+ "\\log": 29243,
256
+ "\\longleftarrow": 29244,
257
+ "\\longleftrightarrow": 29245,
258
+ "\\longmapsto": 29246,
259
+ "\\longrightarrow": 29247,
260
+ "\\looparrowleft": 29248,
261
+ "\\looparrowright": 29249,
262
+ "\\lor": 29250,
263
+ "\\lt": 29251,
264
+ "\\ltimes": 29252,
265
+ "\\lvert": 29253,
266
+ "\\lvertneqq": 29254,
267
+ "\\mapsto": 29255,
268
+ "\\mathbb": 29256,
269
+ "\\mathcal": 29257,
270
+ "\\mathrm": 29258,
271
+ "\\max": 29259,
272
+ "\\measuredangle": 29260,
273
+ "\\mho": 29261,
274
+ "\\mid": 29262,
275
+ "\\min": 29263,
276
+ "\\mod": 29264,
277
+ "\\mp": 29265,
278
+ "\\mu": 29266,
279
+ "\\multimap": 29267,
280
+ "\\nLeftarrow": 29268,
281
+ "\\nLeftrightarrow": 29269,
282
+ "\\nRightarrow": 29270,
283
+ "\\nabla": 29271,
284
+ "\\ncong": 29272,
285
+ "\\ne": 29273,
286
+ "\\nearrow": 29274,
287
+ "\\neg": 29275,
288
+ "\\neq": 29276,
289
+ "\\newline": 29277,
290
+ "\\nexists": 29278,
291
+ "\\ngeq": 29279,
292
+ "\\ngeqq": 29280,
293
+ "\\ngeqslant": 29281,
294
+ "\\ngtr": 29282,
295
+ "\\ni": 29283,
296
+ "\\nleftarrow": 29284,
297
+ "\\nleftrightarrow": 29285,
298
+ "\\nleq": 29286,
299
+ "\\nleqq": 29287,
300
+ "\\nleqslant": 29288,
301
+ "\\nless": 29289,
302
+ "\\nmid": 29290,
303
+ "\\not": 29291,
304
+ "\\nparallel": 29292,
305
+ "\\nprec": 29293,
306
+ "\\npreceq": 29294,
307
+ "\\nrightarrow": 29295,
308
+ "\\nsim": 29296,
309
+ "\\nsubseteq": 29297,
310
+ "\\nsucc": 29298,
311
+ "\\nsucceq": 29299,
312
+ "\\nsupseteq": 29300,
313
+ "\\nu": 29301,
314
+ "\\nwarrow": 29302,
315
+ "\\odot": 29303,
316
+ "\\of": 29304,
317
+ "\\oint": 29305,
318
+ "\\omega": 29306,
319
+ "\\omicron": 29307,
320
+ "\\ominus": 29308,
321
+ "\\operatorname": 29310,
322
+ "\\oplus": 29309,
323
+ "\\otimes": 29311,
324
+ "\\over": 29312,
325
+ "\\overleftarrow": 29313,
326
+ "\\overline": 29314,
327
+ "\\overrightarrow": 29315,
328
+ "\\overset": 29316,
329
+ "\\owns": 29317,
330
+ "\\parallel": 29318,
331
+ "\\partial": 29319,
332
+ "\\perp": 29320,
333
+ "\\phi": 29321,
334
+ "\\pi": 29322,
335
+ "\\pm": 29323,
336
+ "\\pmod": 29324,
337
+ "\\pod": 29325,
338
+ "\\prec": 29326,
339
+ "\\precapprox": 29327,
340
+ "\\preccurlyeq": 29328,
341
+ "\\preceq": 29329,
342
+ "\\precnapprox": 29330,
343
+ "\\precneqq": 29331,
344
+ "\\precnsim": 29332,
345
+ "\\precsim": 29333,
346
+ "\\prime": 29334,
347
+ "\\prod": 29335,
348
+ "\\projlim": 29336,
349
+ "\\propto": 29337,
350
+ "\\psi": 29338,
351
+ "\\qquad": 29339,
352
+ "\\quad": 29340,
353
+ "\\qvar": 29341,
354
+ "\\rVert": 29342,
355
+ "\\rangle": 29343,
356
+ "\\rbrace": 29344,
357
+ "\\rbrack": 29345,
358
+ "\\rceil": 29346,
359
+ "\\rfloor": 29347,
360
+ "\\rgroup": 29348,
361
+ "\\rho": 29349,
362
+ "\\right": 29350,
363
+ "\\rightarrow": 29351,
364
+ "\\rightarrowtail": 29352,
365
+ "\\rightharpoondown": 29353,
366
+ "\\rightharpoonup": 29354,
367
+ "\\rightleftarrows": 29355,
368
+ "\\rightleftharpoons": 29356,
369
+ "\\rightrightarrows": 29357,
370
+ "\\rightsquigarrow": 29358,
371
+ "\\rmoustache": 29359,
372
+ "\\root": 29360,
373
+ "\\rtimes": 29361,
374
+ "\\rvert": 29362,
375
+ "\\scriptscriptstyle": 29363,
376
+ "\\scriptstyle": 29364,
377
+ "\\searrow": 29365,
378
+ "\\sec": 29366,
379
+ "\\setminus": 29367,
380
+ "\\sgn": 29368,
381
+ "\\sigma": 29369,
382
+ "\\sign": 29370,
383
+ "\\signum": 29371,
384
+ "\\sim": 29372,
385
+ "\\simeq": 29373,
386
+ "\\sin": 29374,
387
+ "\\sinh": 29375,
388
+ "\\smallfrown": 29376,
389
+ "\\smallint": 29377,
390
+ "\\smallsetminus": 29378,
391
+ "\\smallsmile": 29379,
392
+ "\\sphericalangle": 29380,
393
+ "\\sqcap": 29381,
394
+ "\\sqcup": 29382,
395
+ "\\sqrt": 29383,
396
+ "\\sqsubset": 29384,
397
+ "\\sqsubseteq": 29385,
398
+ "\\sqsupset": 29386,
399
+ "\\sqsupseteq": 29387,
400
+ "\\stackrel": 29388,
401
+ "\\subset": 29389,
402
+ "\\subseteq": 29390,
403
+ "\\subseteqq": 29391,
404
+ "\\subsetneq": 29392,
405
+ "\\subsetneqq": 29393,
406
+ "\\succ": 29394,
407
+ "\\succapprox": 29395,
408
+ "\\succcurlyeq": 29396,
409
+ "\\succeq": 29397,
410
+ "\\succnapprox": 29398,
411
+ "\\succneqq": 29399,
412
+ "\\succnsim": 29400,
413
+ "\\succsim": 29401,
414
+ "\\sum": 29402,
415
+ "\\sup": 29403,
416
+ "\\supset": 29404,
417
+ "\\supseteq": 29405,
418
+ "\\supseteqq": 29406,
419
+ "\\supsetneq": 29407,
420
+ "\\supsetneqq": 29408,
421
+ "\\swarrow": 29409,
422
+ "\\tan": 29410,
423
+ "\\tanh": 29411,
424
+ "\\tau": 29412,
425
+ "\\tbinom": 29413,
426
+ "\\text": 29414,
427
+ "\\textstyle": 29415,
428
+ "\\tfrac": 29416,
429
+ "\\theta": 29417,
430
+ "\\thickapprox": 29418,
431
+ "\\thicksim": 29419,
432
+ "\\times": 29420,
433
+ "\\to": 29421,
434
+ "\\top": 29422,
435
+ "\\triangle": 29423,
436
+ "\\triangledown": 29424,
437
+ "\\triangleleft": 29425,
438
+ "\\trianglelefteq": 29426,
439
+ "\\triangleq": 29427,
440
+ "\\triangleright": 29428,
441
+ "\\trianglerighteq": 29429,
442
+ "\\underline": 29430,
443
+ "\\underset": 29431,
444
+ "\\unlhd": 29432,
445
+ "\\unrhd": 29433,
446
+ "\\uparrow": 29434,
447
+ "\\updownarrow": 29435,
448
+ "\\upharpoonleft": 29436,
449
+ "\\upharpoonright": 29437,
450
+ "\\uplus": 29438,
451
+ "\\upsilon": 29439,
452
+ "\\upuparrows": 29440,
453
+ "\\varDelta": 29441,
454
+ "\\varGamma": 29442,
455
+ "\\varPhi": 29443,
456
+ "\\varPi": 29444,
457
+ "\\varPsi": 29445,
458
+ "\\varSigma": 29446,
459
+ "\\varTheta": 29447,
460
+ "\\varUpsilon": 29448,
461
+ "\\varXi": 29449,
462
+ "\\varepsilon": 29450,
463
+ "\\varinjlim": 29451,
464
+ "\\varkappa": 29452,
465
+ "\\varliminf": 29453,
466
+ "\\varlimsup": 29454,
467
+ "\\varnothing": 29455,
468
+ "\\varphi": 29456,
469
+ "\\varpi": 29457,
470
+ "\\varprojlim": 29458,
471
+ "\\varpropto": 29459,
472
+ "\\varrho": 29460,
473
+ "\\varsigma": 29461,
474
+ "\\varsubsetneq": 29462,
475
+ "\\varsubsetneqq": 29463,
476
+ "\\varsupsetneq": 29464,
477
+ "\\varsupsetneqq": 29465,
478
+ "\\vartheta": 29466,
479
+ "\\vartriangle": 29467,
480
+ "\\vartriangleleft": 29468,
481
+ "\\vartriangleright": 29469,
482
+ "\\vdots": 29470,
483
+ "\\vec": 29471,
484
+ "\\vee": 29472,
485
+ "\\veebar": 29473,
486
+ "\\vert": 29474,
487
+ "\\wedge": 29475,
488
+ "\\wr": 29476,
489
+ "\\xi": 29477,
490
+ "\\xleftarrow": 29478,
491
+ "\\xrightarrow": 29479,
492
+ "\\zeta": 29480,
493
+ "align": 29487,
494
+ "alignat": 29488,
495
+ "alignedat": 29489,
496
+ "bmatrix": 29490,
497
+ "eqnarray": 29491,
498
+ "multline": 29492,
499
+ "pmatrix": 29493,
500
+ "smallmatrix": 29494,
501
+ "subarray": 29495,
502
+ "vmatrix": 29496
503
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "do_lower_case": false,
4
+ "mask_token": "[MASK]",
5
+ "model_max_length": 512,
6
+ "name_or_path": "models/MPBERT_MF_MT_NMF_MFR",
7
+ "pad_token": "[PAD]",
8
+ "sep_token": "[SEP]",
9
+ "special_tokens_map_file": null,
10
+ "strip_accents": null,
11
+ "tokenize_chinese_chars": true,
12
+ "tokenizer_class": "BertTokenizer",
13
+ "unk_token": "[UNK]"
14
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff