BioTokenizer-BFD-UNI-100 / tokenizer.json
dotan1111's picture
Upload 2 files
e35952a
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<UNK>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": {
"type": "Lowercase"
},
"pre_tokenizer": {
"type": "Whitespace"
},
"post_processor": null,
"decoder": null,
"model": {
"type": "Unigram",
"unk_id": 0,
"vocab": [
[
"<UNK>",
0.0
],
[
"i",
-3.0203980367124537
],
[
"t",
-3.062956886044912
],
[
"k",
-3.082041895330107
],
[
"f",
-3.103728012092901
],
[
"q",
-3.135578439066535
],
[
"a",
-3.1735611832016133
],
[
"e",
-3.2414619148167
],
[
"s",
-3.245056006002681
],
[
"r",
-3.2453138014528733
],
[
"d",
-3.247259733320952
],
[
"g",
-3.2735768039326345
],
[
"l",
-3.2758473315755303
],
[
"n",
-3.2874450246220093
],
[
"p",
-3.3366652351080646
],
[
"v",
-3.4634740370979564
],
[
"h",
-3.5246874642761234
],
[
"y",
-3.5423092969956507
],
[
"m",
-3.7572360683510553
],
[
"w",
-4.1068255516984
],
[
"c",
-4.1691497756131
],
[
"aa",
-4.585326076591901
],
[
"la",
-4.887530882982123
],
[
"al",
-4.992849418633071
],
[
"ll",
-5.029207021549425
],
[
"ag",
-5.034617504795474
],
[
"gg",
-5.209981982112646
],
[
"rr",
-5.219453257678689
],
[
"va",
-5.284565871717094
],
[
"av",
-5.309113365418225
],
[
"lg",
-5.335946291080051
],
[
"ar",
-5.346487427622183
],
[
"ga",
-5.347768184015516
],
[
"rl",
-5.359188586198691
],
[
"ra",
-5.388899095021582
],
[
"lv",
-5.415610462404311
],
[
"vl",
-5.442687278673805
],
[
"pa",
-5.4566928056054405
],
[
"gl",
-5.457117261276412
],
[
"lr",
-5.502800422610287
],
[
"vv",
-5.5241406506219
],
[
"gr",
-5.610282811468741
],
[
"gv",
-5.634492053238818
],
[
"ae",
-5.636273072018625
],
[
"ls",
-5.649664173419566
],
[
"sg",
-5.651066151953925
],
[
"vg",
-5.699240313159557
],
[
"pg",
-5.705567860524408
],
[
"sl",
-5.714505001184756
],
[
"sa",
-5.72066748094986
],
[
"as",
-5.726946393087237
],
[
"dl",
-5.731060965222051
],
[
"el",
-5.735435604764186
],
[
"ss",
-5.73815657796286
],
[
"da",
-5.743365731914071
],
[
"rg",
-5.748123393774561
],
[
"le",
-5.7677881761217655
],
[
"ia",
-5.776944415806298
],
[
"ta",
-5.77737040425999
],
[
"ld",
-5.782381548600611
],
[
"ea",
-5.805221911324413
],
[
"tl",
-5.815791178543998
],
[
"ad",
-5.831030225259809
],
[
"dg",
-5.842558129083246
],
[
"lp",
-5.842667174329858
],
[
"tg",
-5.856628213333442
],
[
"gs",
-5.88330933978234
],
[
"rv",
-5.887710110137677
],
[
"pl",
-5.901498352812656
],
[
"er",
-5.92200381405967
],
[
"x",
-5.933807364971587
],
[
"at",
-5.943452569412951
],
[
"rs",
-5.9468087421559535
],
[
"vr",
-5.964121749447516
],
[
"pv",
-5.977135678629862
],
[
"ve",
-5.980805107024498
],
[
"lt",
-5.986052614892426
],
[
"ap",
-5.995057905935505
],
[
"ge",
-6.010117098429783
],
[
"pp",
-6.027936858412414
],
[
"re",
-6.041928176100955
],
[
"dv",
-6.043940338964116
],
[
"tv",
-6.059045706661092
],
[
"ig",
-6.061067377196126
],
[
"vs",
-6.06266048412966
],
[
"gd",
-6.09875857638583
],
[
"vd",
-6.118779472304132
],
[
"sv",
-6.128847930365273
],
[
"rp",
-6.138786760719885
],
[
"ee",
-6.142724936012062
],
[
"ps",
-6.153316985162256
],
[
"de",
-6.204250643531047
],
[
"il",
-6.207346894537773
],
[
"ev",
-6.242737226311572
],
[
"rd",
-6.269418524522775
],
[
"sp",
-6.338806140439619
],
[
"u",
-17.79582990121073
],
[
"b",
-17.996473404241875
],
[
"z",
-19.23728665450807
],
[
"o",
-20.68728665451499
]
]
}
}