Upload 4 files
Browse filesSwitch to tiktoken encoding
- README.md +0 -1
- config.json +1 -1
README.md
CHANGED
@@ -3,7 +3,6 @@
|
|
3 |
ByteLevel BPE tokenizer trained on fhswf/tiny-stack dataset.
|
4 |
|
5 |
## Usage
|
6 |
-
|
7 |
```python
|
8 |
from tokenizers.implementations import ByteLevelBPETokenizer
|
9 |
from tokenizers.processors import BertProcessing
|
|
|
3 |
ByteLevel BPE tokenizer trained on fhswf/tiny-stack dataset.
|
4 |
|
5 |
## Usage
|
|
|
6 |
```python
|
7 |
from tokenizers.implementations import ByteLevelBPETokenizer
|
8 |
from tokenizers.processors import BertProcessing
|
config.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
-
"vocab_size": 52000,
|
3 |
"model_type": "gpt2",
|
|
|
4 |
"min_frequency": 2,
|
5 |
"special_tokens": [
|
6 |
"<s>",
|
|
|
1 |
{
|
|
|
2 |
"model_type": "gpt2",
|
3 |
+
"vocab_size": 52000,
|
4 |
"min_frequency": 2,
|
5 |
"special_tokens": [
|
6 |
"<s>",
|