add tokenizer
Browse files- tokenizer.json +4 -2
tokenizer.json
CHANGED
|
@@ -53,7 +53,8 @@
|
|
| 53 |
"pre_tokenizer": {
|
| 54 |
"type": "ByteLevel",
|
| 55 |
"add_prefix_space": false,
|
| 56 |
-
"trim_offsets": true
|
|
|
|
| 57 |
},
|
| 58 |
"post_processor": {
|
| 59 |
"type": "RobertaProcessing",
|
|
@@ -71,7 +72,8 @@
|
|
| 71 |
"decoder": {
|
| 72 |
"type": "ByteLevel",
|
| 73 |
"add_prefix_space": true,
|
| 74 |
-
"trim_offsets": true
|
|
|
|
| 75 |
},
|
| 76 |
"model": {
|
| 77 |
"type": "BPE",
|
|
|
|
| 53 |
"pre_tokenizer": {
|
| 54 |
"type": "ByteLevel",
|
| 55 |
"add_prefix_space": false,
|
| 56 |
+
"trim_offsets": true,
|
| 57 |
+
"use_regex": true
|
| 58 |
},
|
| 59 |
"post_processor": {
|
| 60 |
"type": "RobertaProcessing",
|
|
|
|
| 72 |
"decoder": {
|
| 73 |
"type": "ByteLevel",
|
| 74 |
"add_prefix_space": true,
|
| 75 |
+
"trim_offsets": true,
|
| 76 |
+
"use_regex": true
|
| 77 |
},
|
| 78 |
"model": {
|
| 79 |
"type": "BPE",
|