Semojak commited on
Commit
b6a88c9
·
verified ·
1 Parent(s): 4aeb511

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +0 -0
  2. tokenizer_config.json +5 -5
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "added_tokens_decoder": {
3
  "0": {
4
- "content": "[PAD]",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
@@ -9,7 +9,7 @@
9
  "special": true
10
  },
11
  "1": {
12
- "content": "[UNK]",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
@@ -17,7 +17,7 @@
17
  "special": true
18
  },
19
  "2": {
20
- "content": "[CLS]",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
@@ -25,7 +25,7 @@
25
  "special": true
26
  },
27
  "3": {
28
- "content": "[SEP]",
29
  "lstrip": false,
30
  "normalized": false,
31
  "rstrip": false,
@@ -57,7 +57,7 @@
57
  "stride": 0,
58
  "strip_accents": null,
59
  "tokenize_chinese_chars": true,
60
- "tokenizer_class": "ElectraTokenizer",
61
  "truncation_side": "right",
62
  "truncation_strategy": "longest_first",
63
  "unk_token": "[UNK]"
 
1
  {
2
  "added_tokens_decoder": {
3
  "0": {
4
+ "content": "[UNK]",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
 
9
  "special": true
10
  },
11
  "1": {
12
+ "content": "[CLS]",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
 
17
  "special": true
18
  },
19
  "2": {
20
+ "content": "[SEP]",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
 
25
  "special": true
26
  },
27
  "3": {
28
+ "content": "[PAD]",
29
  "lstrip": false,
30
  "normalized": false,
31
  "rstrip": false,
 
57
  "stride": 0,
58
  "strip_accents": null,
59
  "tokenize_chinese_chars": true,
60
+ "tokenizer_class": "PreTrainedTokenizerFast",
61
  "truncation_side": "right",
62
  "truncation_strategy": "longest_first",
63
  "unk_token": "[UNK]"