Commit
·
9a3a666
1
Parent(s):
39da080
Upload tpi_latn_5mb tokenizer.
Browse files- added_tokens.json +1 -0
- special_tokens_map.json +1 -0
- spiece.model +3 -0
- tokenizer.json +0 -0
- tokenizer_config.json +1 -0
added_tokens.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"[XXXXX86]": 40691, "[XXXXX7]": 40612, "[XXXXX44]": 40649, "[XXXXX265]": 40870, "[XXXXX84]": 40689, "[XXXXX247]": 40852, "[XXXXX233]": 40838, "[XXXXX21]": 40626, "[XXXXX286]": 40891, "[XXXXX139]": 40744, "[XXXXX159]": 40764, "[XXXXX333]": 40938, "[XXXXX97]": 40702, "[XXXXX45]": 40650, "[XXXXX1]": 40606, "[XXXXX306]": 40911, "[XXXXX352]": 40957, "[XXXXX239]": 40844, "[XXXXX269]": 40874, "[XXXXX254]": 40859, "[XXXXX263]": 40868, "[XXXXX2]": 40607, "[XXXXX27]": 40632, "[XXXXX88]": 40693, "[XXXXX143]": 40748, "[CLS]": 40601, "[XXXXX330]": 40935, "[XXXXX231]": 40836, "[XXXXX119]": 40724, "[XXXXX180]": 40785, "[XXXXX32]": 40637, "[XXXXX153]": 40758, "[XXXXX297]": 40902, "[XXXXX353]": 40958, "[XXXXX298]": 40903, "[XXXXX273]": 40878, "[XXXXX38]": 40643, "[XXXXX149]": 40754, "[XXXXX278]": 40883, "<pad>": 40603, "[XXXXX310]": 40915, "[XXXXX94]": 40699, "[XXXXX217]": 40822, "[XXXXX325]": 40930, "[XXXXX245]": 40850, "[XXXXX329]": 40934, "[XXXXX206]": 40811, "[XXXXX146]": 40751, "[XXXXX72]": 40677, "[XXXXX70]": 40675, "[XXXXX15]": 40620, "[XXXXX125]": 40730, "[XXXXX66]": 40671, "[XXXXX114]": 40719, "[XXXXX277]": 40882, "[XXXXX171]": 40776, "[XXXXX22]": 40627, "[XXXXX225]": 40830, "[XXXXX281]": 40886, "[XXXXX337]": 40942, "[XXXXX47]": 40652, "[XXXXX164]": 40769, "[XXXXX222]": 40827, "[XXXXX147]": 40752, "[XXXXX82]": 40687, "[XXXXX313]": 40918, "[XXXXX170]": 40775, "[XXXXX8]": 40613, "[XXXXX271]": 40876, "[XXXXX3]": 40608, "[XXXXX148]": 40753, "[XXXXX5]": 40610, "[XXXXX336]": 40941, "[XXXXX203]": 40808, "[XXXXX207]": 40812, "[XXXXX218]": 40823, "[XXXXX61]": 40666, "[XXXXX340]": 40945, "[XXXXX288]": 40893, "[XXXXX184]": 40789, "[XXXXX0]": 40605, "[XXXXX261]": 40866, "[XXXXX255]": 40860, "[XXXXX35]": 40640, "[XXXXX106]": 40711, "[XXXXX341]": 40946, "[XXXXX187]": 40792, "[XXXXX168]": 40773, "[XXXXX227]": 40832, "[XXXXX122]": 40727, "[XXXXX270]": 40875, "[XXXXX323]": 40928, "[XXXXX57]": 40662, "[XXXXX101]": 40706, "[XXXXX295]": 40900, "[XXXXX48]": 40653, "[XXXXX178]": 40783, "[XXXXX136]": 40741, "[XXXXX25]": 40630, "[XXXXX60]": 40665, "[XXXXX129]": 40734, "[XXXXX17]": 40622, "[XXXXX18]": 40623, "[XXXXX166]": 40771, "[XXXXX287]": 40892, "[XXXXX118]": 40723, "[XXXXX324]": 40929, "[XXXXX320]": 40925, "[XXXXX177]": 40782, "[XXXXX68]": 40673, "[XXXXX219]": 40824, "[XXXXX326]": 40931, "[XXXXX59]": 40664, "[XXXXX79]": 40684, "[XXXXX55]": 40660, "[XXXXX93]": 40698, "[XXXXX144]": 40749, "[XXXXX317]": 40922, "[XXXXX63]": 40668, "[XXXXX283]": 40888, "[XXXXX30]": 40635, "[XXXXX235]": 40840, "[XXXXX158]": 40763, "[XXXXX220]": 40825, "[XXXXX221]": 40826, "[XXXXX321]": 40926, "[XXXXX284]": 40889, "[XXXXX127]": 40732, "[XXXXX54]": 40659, "[XXXXX304]": 40909, "[XXXXX115]": 40720, "[XXXXX175]": 40780, "[XXXXX208]": 40813, "[XXXXX98]": 40703, "[XXXXX240]": 40845, "[XXXXX96]": 40701, "[XXXXX290]": 40895, "[XXXXX309]": 40914, "[XXXXX267]": 40872, "[XXXXX338]": 40943, "[MASK]": 40604, "[XXXXX350]": 40955, "[XXXXX351]": 40956, "[XXXXX42]": 40647, "[XXXXX138]": 40743, "[XXXXX280]": 40885, "[XXXXX300]": 40905, "[XXXXX215]": 40820, "[XXXXX19]": 40624, "[XXXXX161]": 40766, "[XXXXX89]": 40694, "[XXXXX76]": 40681, "[XXXXX162]": 40767, "[XXXXX132]": 40737, "[XXXXX37]": 40642, "[XXXXX251]": 40856, "[XXXXX141]": 40746, "[XXXXX28]": 40633, "[XXXXX103]": 40708, "[XXXXX49]": 40654, "[XXXXX318]": 40923, "[XXXXX56]": 40661, "[XXXXX250]": 40855, "[XXXXX123]": 40728, "[XXXXX305]": 40910, "[XXXXX163]": 40768, "[XXXXX91]": 40696, "[XXXXX110]": 40715, "[XXXXX296]": 40901, "[XXXXX124]": 40729, "[XXXXX193]": 40798, "[XXXXX335]": 40940, "[XXXXX216]": 40821, "[XXXXX64]": 40669, "[XXXXX160]": 40765, "[XXXXX169]": 40774, "[XXXXX259]": 40864, "[XXXXX289]": 40894, "[XXXXX249]": 40854, "[XXXXX348]": 40953, "[XXXXX43]": 40648, "[XXXXX189]": 40794, "[XXXXX211]": 40816, "[XXXXX322]": 40927, "[XXXXX62]": 40667, "[XXXXX10]": 40615, "[XXXXX67]": 40672, "[XXXXX312]": 40917, "[XXXXX228]": 40833, "[XXXXX111]": 40716, "[XXXXX12]": 40617, "[XXXXX230]": 40835, "[XXXXX186]": 40791, "[XXXXX116]": 40721, "[XXXXX343]": 40948, "[XXXXX134]": 40739, "[XXXXX346]": 40951, "[XXXXX308]": 40913, "[XXXXX182]": 40787, "[XXXXX223]": 40828, "[XXXXX205]": 40810, "[XXXXX268]": 40873, "[XXXXX107]": 40712, "[XXXXX301]": 40906, "[XXXXX24]": 40629, "[XXXXX185]": 40790, "[XXXXX274]": 40879, "[XXXXX199]": 40804, "[XXXXX191]": 40796, "[XXXXX87]": 40692, "[XXXXX73]": 40678, "[XXXXX142]": 40747, "[XXXXX179]": 40784, "[XXXXX26]": 40631, "[XXXXX264]": 40869, "[XXXXX266]": 40871, "[XXXXX197]": 40802, "[XXXXX156]": 40761, "[XXXXX34]": 40639, "[XXXXX39]": 40644, "[XXXXX327]": 40932, "[XXXXX334]": 40939, "[XXXXX224]": 40829, "[XXXXX234]": 40839, "[XXXXX99]": 40704, "[XXXXX113]": 40718, "[XXXXX14]": 40619, "[XXXXX302]": 40907, "[XXXXX16]": 40621, "[XXXXX85]": 40690, "[XXXXX257]": 40862, "[XXXXX354]": 40959, "[XXXXX238]": 40843, "[XXXXX150]": 40755, "[XXXXX112]": 40717, "[XXXXX244]": 40849, "[XXXXX276]": 40881, "[XXXXX74]": 40679, "[XXXXX314]": 40919, "[XXXXX316]": 40921, "[XXXXX293]": 40898, "[XXXXX121]": 40726, "[XXXXX155]": 40760, "[XXXXX4]": 40609, "[XXXXX133]": 40738, "[XXXXX131]": 40736, "[XXXXX31]": 40636, "[XXXXX252]": 40857, "[XXXXX165]": 40770, "[XXXXX51]": 40656, "[XXXXX109]": 40714, "[XXXXX20]": 40625, "[XXXXX272]": 40877, "[XXXXX344]": 40949, "[XXXXX92]": 40697, "[XXXXX291]": 40896, "[XXXXX345]": 40950, "[XXXXX174]": 40779, "[XXXXX151]": 40756, "[XXXXX90]": 40695, "[XXXXX33]": 40638, "[XXXXX258]": 40863, "[XXXXX140]": 40745, "[XXXXX183]": 40788, "[XXXXX210]": 40815, "[XXXXX236]": 40841, "[XXXXX120]": 40725, "[XXXXX53]": 40658, "[XXXXX226]": 40831, "[XXXXX292]": 40897, "[XXXXX77]": 40682, "[XXXXX190]": 40795, "[XXXXX81]": 40686, "[XXXXX181]": 40786, "[XXXXX196]": 40801, "[SEP]": 40602, "[XXXXX260]": 40865, "[XXXXX11]": 40616, "[XXXXX117]": 40722, "[XXXXX202]": 40807, "[XXXXX71]": 40676, "[XXXXX95]": 40700, "[XXXXX192]": 40797, "[XXXXX209]": 40814, "[XXXXX36]": 40641, "[XXXXX242]": 40847, "[XXXXX137]": 40742, "[XXXXX58]": 40663, "[XXXXX256]": 40861, "[XXXXX294]": 40899, "[XXXXX83]": 40688, "[XXXXX157]": 40762, "[XXXXX331]": 40936, "[XXXXX349]": 40954, "[XXXXX145]": 40750, "[XXXXX46]": 40651, "[XXXXX246]": 40851, "[XXXXX41]": 40646, "[XXXXX232]": 40837, "[XXXXX248]": 40853, "[XXXXX105]": 40710, "[XXXXX69]": 40674, "[XXXXX195]": 40800, "[XXXXX285]": 40890, "[XXXXX75]": 40680, "[XXXXX78]": 40683, "[XXXXX328]": 40933, "[XXXXX342]": 40947, "[XXXXX200]": 40805, "[XXXXX332]": 40937, "[XXXXX201]": 40806, "[XXXXX108]": 40713, "[XXXXX104]": 40709, "[XXXXX172]": 40777, "[XXXXX65]": 40670, "[XXXXX23]": 40628, "[XXXXX275]": 40880, "[XXXXX213]": 40818, "[XXXXX135]": 40740, "[XXXXX253]": 40858, "[XXXXX173]": 40778, "[XXXXX154]": 40759, "[XXXXX315]": 40920, "[XXXXX198]": 40803, "[XXXXX303]": 40908, "[XXXXX52]": 40657, "[XXXXX347]": 40952, "[XXXXX100]": 40705, "[XXXXX299]": 40904, "[XXXXX319]": 40924, "[XXXXX80]": 40685, "[XXXXX229]": 40834, "[XXXXX311]": 40916, "[XXXXX188]": 40793, "[XXXXX152]": 40757, "[XXXXX9]": 40614, "[XXXXX204]": 40809, "[XXXXX279]": 40884, "[XXXXX40]": 40645, "[XXXXX126]": 40731, "[XXXXX339]": 40944, "[XXXXX243]": 40848, "[XXXXX50]": 40655, "[XXXXX128]": 40733, "[XXXXX176]": 40781, "[XXXXX29]": 40634, "[XXXXX282]": 40887, "[XXXXX212]": 40817, "[XXXXX262]": 40867, "[XXXXX194]": 40799, "[XXXXX214]": 40819, "[XXXXX13]": 40618, "[XXXXX102]": 40707, "[XXXXX241]": 40846, "[XXXXX167]": 40772, "[XXXXX6]": 40611, "[XXXXX237]": 40842, "[XXXXX130]": 40735, "[XXXXX307]": 40912}
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}, "additional_special_tokens": ["[XXXXX0]", "[XXXXX1]", "[XXXXX2]", "[XXXXX3]", "[XXXXX4]", "[XXXXX5]", "[XXXXX6]", "[XXXXX7]", "[XXXXX8]", "[XXXXX9]", "[XXXXX10]", "[XXXXX11]", "[XXXXX12]", "[XXXXX13]", "[XXXXX14]", "[XXXXX15]", "[XXXXX16]", "[XXXXX17]", "[XXXXX18]", "[XXXXX19]", "[XXXXX20]", "[XXXXX21]", "[XXXXX22]", "[XXXXX23]", "[XXXXX24]", "[XXXXX25]", "[XXXXX26]", "[XXXXX27]", "[XXXXX28]", "[XXXXX29]", "[XXXXX30]", "[XXXXX31]", "[XXXXX32]", "[XXXXX33]", "[XXXXX34]", "[XXXXX35]", "[XXXXX36]", "[XXXXX37]", "[XXXXX38]", "[XXXXX39]", "[XXXXX40]", "[XXXXX41]", "[XXXXX42]", "[XXXXX43]", "[XXXXX44]", "[XXXXX45]", "[XXXXX46]", "[XXXXX47]", "[XXXXX48]", "[XXXXX49]", "[XXXXX50]", "[XXXXX51]", "[XXXXX52]", "[XXXXX53]", "[XXXXX54]", "[XXXXX55]", "[XXXXX56]", "[XXXXX57]", "[XXXXX58]", "[XXXXX59]", "[XXXXX60]", "[XXXXX61]", "[XXXXX62]", "[XXXXX63]", "[XXXXX64]", "[XXXXX65]", "[XXXXX66]", "[XXXXX67]", "[XXXXX68]", "[XXXXX69]", "[XXXXX70]", "[XXXXX71]", "[XXXXX72]", "[XXXXX73]", "[XXXXX74]", "[XXXXX75]", "[XXXXX76]", "[XXXXX77]", "[XXXXX78]", "[XXXXX79]", "[XXXXX80]", "[XXXXX81]", "[XXXXX82]", "[XXXXX83]", "[XXXXX84]", "[XXXXX85]", "[XXXXX86]", "[XXXXX87]", "[XXXXX88]", "[XXXXX89]", "[XXXXX90]", "[XXXXX91]", "[XXXXX92]", "[XXXXX93]", "[XXXXX94]", "[XXXXX95]", "[XXXXX96]", "[XXXXX97]", "[XXXXX98]", "[XXXXX99]", "[XXXXX100]", "[XXXXX101]", "[XXXXX102]", "[XXXXX103]", "[XXXXX104]", "[XXXXX105]", "[XXXXX106]", "[XXXXX107]", "[XXXXX108]", "[XXXXX109]", "[XXXXX110]", "[XXXXX111]", "[XXXXX112]", "[XXXXX113]", "[XXXXX114]", "[XXXXX115]", "[XXXXX116]", "[XXXXX117]", "[XXXXX118]", "[XXXXX119]", "[XXXXX120]", "[XXXXX121]", "[XXXXX122]", "[XXXXX123]", "[XXXXX124]", "[XXXXX125]", "[XXXXX126]", "[XXXXX127]", "[XXXXX128]", "[XXXXX129]", "[XXXXX130]", "[XXXXX131]", "[XXXXX132]", "[XXXXX133]", "[XXXXX134]", "[XXXXX135]", "[XXXXX136]", "[XXXXX137]", "[XXXXX138]", "[XXXXX139]", "[XXXXX140]", "[XXXXX141]", "[XXXXX142]", "[XXXXX143]", "[XXXXX144]", "[XXXXX145]", "[XXXXX146]", "[XXXXX147]", "[XXXXX148]", "[XXXXX149]", "[XXXXX150]", "[XXXXX151]", "[XXXXX152]", "[XXXXX153]", "[XXXXX154]", "[XXXXX155]", "[XXXXX156]", "[XXXXX157]", "[XXXXX158]", "[XXXXX159]", "[XXXXX160]", "[XXXXX161]", "[XXXXX162]", "[XXXXX163]", "[XXXXX164]", "[XXXXX165]", "[XXXXX166]", "[XXXXX167]", "[XXXXX168]", "[XXXXX169]", "[XXXXX170]", "[XXXXX171]", "[XXXXX172]", "[XXXXX173]", "[XXXXX174]", "[XXXXX175]", "[XXXXX176]", "[XXXXX177]", "[XXXXX178]", "[XXXXX179]", "[XXXXX180]", "[XXXXX181]", "[XXXXX182]", "[XXXXX183]", "[XXXXX184]", "[XXXXX185]", "[XXXXX186]", "[XXXXX187]", "[XXXXX188]", "[XXXXX189]", "[XXXXX190]", "[XXXXX191]", "[XXXXX192]", "[XXXXX193]", "[XXXXX194]", "[XXXXX195]", "[XXXXX196]", "[XXXXX197]", "[XXXXX198]", "[XXXXX199]", "[XXXXX200]", "[XXXXX201]", "[XXXXX202]", "[XXXXX203]", "[XXXXX204]", "[XXXXX205]", "[XXXXX206]", "[XXXXX207]", "[XXXXX208]", "[XXXXX209]", "[XXXXX210]", "[XXXXX211]", "[XXXXX212]", "[XXXXX213]", "[XXXXX214]", "[XXXXX215]", "[XXXXX216]", "[XXXXX217]", "[XXXXX218]", "[XXXXX219]", "[XXXXX220]", "[XXXXX221]", "[XXXXX222]", "[XXXXX223]", "[XXXXX224]", "[XXXXX225]", "[XXXXX226]", "[XXXXX227]", "[XXXXX228]", "[XXXXX229]", "[XXXXX230]", "[XXXXX231]", "[XXXXX232]", "[XXXXX233]", "[XXXXX234]", "[XXXXX235]", "[XXXXX236]", "[XXXXX237]", "[XXXXX238]", "[XXXXX239]", "[XXXXX240]", "[XXXXX241]", "[XXXXX242]", "[XXXXX243]", "[XXXXX244]", "[XXXXX245]", "[XXXXX246]", "[XXXXX247]", "[XXXXX248]", "[XXXXX249]", "[XXXXX250]", "[XXXXX251]", "[XXXXX252]", "[XXXXX253]", "[XXXXX254]", "[XXXXX255]", "[XXXXX256]", "[XXXXX257]", "[XXXXX258]", "[XXXXX259]", "[XXXXX260]", "[XXXXX261]", "[XXXXX262]", "[XXXXX263]", "[XXXXX264]", "[XXXXX265]", "[XXXXX266]", "[XXXXX267]", "[XXXXX268]", "[XXXXX269]", "[XXXXX270]", "[XXXXX271]", "[XXXXX272]", "[XXXXX273]", "[XXXXX274]", "[XXXXX275]", "[XXXXX276]", "[XXXXX277]", "[XXXXX278]", "[XXXXX279]", "[XXXXX280]", "[XXXXX281]", "[XXXXX282]", "[XXXXX283]", "[XXXXX284]", "[XXXXX285]", "[XXXXX286]", "[XXXXX287]", "[XXXXX288]", "[XXXXX289]", "[XXXXX290]", "[XXXXX291]", "[XXXXX292]", "[XXXXX293]", "[XXXXX294]", "[XXXXX295]", "[XXXXX296]", "[XXXXX297]", "[XXXXX298]", "[XXXXX299]", "[XXXXX300]", "[XXXXX301]", "[XXXXX302]", "[XXXXX303]", "[XXXXX304]", "[XXXXX305]", "[XXXXX306]", "[XXXXX307]", "[XXXXX308]", "[XXXXX309]", "[XXXXX310]", "[XXXXX311]", "[XXXXX312]", "[XXXXX313]", "[XXXXX314]", "[XXXXX315]", "[XXXXX316]", "[XXXXX317]", "[XXXXX318]", "[XXXXX319]", "[XXXXX320]", "[XXXXX321]", "[XXXXX322]", "[XXXXX323]", "[XXXXX324]", "[XXXXX325]", "[XXXXX326]", "[XXXXX327]", "[XXXXX328]", "[XXXXX329]", "[XXXXX330]", "[XXXXX331]", "[XXXXX332]", "[XXXXX333]", "[XXXXX334]", "[XXXXX335]", "[XXXXX336]", "[XXXXX337]", "[XXXXX338]", "[XXXXX339]", "[XXXXX340]", "[XXXXX341]", "[XXXXX342]", "[XXXXX343]", "[XXXXX344]", "[XXXXX345]", "[XXXXX346]", "[XXXXX347]", "[XXXXX348]", "[XXXXX349]", "[XXXXX350]", "[XXXXX351]", "[XXXXX352]", "[XXXXX353]", "[XXXXX354]"]}
|
spiece.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:185167d491bad1ae7ae8d0e5e8ea0204f724670ac0c21ef3d697c132413b72f3
|
3 |
+
size 860102
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"do_lower_case": false, "remove_space": true, "keep_accents": true, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false, "__type": "AddedToken"}, "sp_model_kwargs": {}, "name_or_path": "models/5mb/tpi_latn_5mb", "model_input_names": ["input_ids", "attention_mask"], "special_tokens_map_file": "models/5mb/tpi_latn_5mb/special_tokens_map.json", "tokenizer_class": "AlbertTokenizer"}
|