pweb002 commited on
Commit
a148a53
·
verified ·
1 Parent(s): f1ffaf5

Upload 4 files

Browse files

Switch to tiktoken encoding

Files changed (2) hide show
  1. README.md +0 -1
  2. config.json +1 -1
README.md CHANGED
@@ -3,7 +3,6 @@
3
  ByteLevel BPE tokenizer trained on fhswf/tiny-stack dataset.
4
 
5
  ## Usage
6
-
7
  ```python
8
  from tokenizers.implementations import ByteLevelBPETokenizer
9
  from tokenizers.processors import BertProcessing
 
3
  ByteLevel BPE tokenizer trained on fhswf/tiny-stack dataset.
4
 
5
  ## Usage
 
6
  ```python
7
  from tokenizers.implementations import ByteLevelBPETokenizer
8
  from tokenizers.processors import BertProcessing
config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "vocab_size": 52000,
3
  "model_type": "gpt2",
 
4
  "min_frequency": 2,
5
  "special_tokens": [
6
  "<s>",
 
1
  {
 
2
  "model_type": "gpt2",
3
+ "vocab_size": 52000,
4
  "min_frequency": 2,
5
  "special_tokens": [
6
  "<s>",