GeminiFan207 commited on
Commit
aa3647a
·
verified ·
1 Parent(s): 39bb318

Create tokenizer.json

Browse files
Files changed (1) hide show
  1. tokenizer.json +45 -0
tokenizer.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "model": "BPE",
4
+ "vocab": {
5
+ "<|begoftext|>": 0,
6
+ "<|endoftext|>": 1,
7
+ "<pad>": 2,
8
+ "<unk>": 3,
9
+ "the": 4,
10
+ "a": 5,
11
+ "and": 6,
12
+ "to": 7,
13
+ "of": 8,
14
+ "in": 9,
15
+ "I": 10,
16
+ "is": 11,
17
+ "it": 12,
18
+ ".": 13,
19
+ ",": 14,
20
+ "th": 15,
21
+ "an": 16,
22
+ "ing": 17,
23
+ "er": 18,
24
+ "on": 19
25
+ },
26
+ "merges": [
27
+ "t h",
28
+ "th e",
29
+ "a n",
30
+ "a nd",
31
+ "i n",
32
+ "o f",
33
+ "to k",
34
+ "i s",
35
+ "in g",
36
+ "e r",
37
+ "o n"
38
+ ],
39
+ "special_tokens": {
40
+ "pad_token": "<pad>",
41
+ "bos_token": "<|begoftext|>",
42
+ "eos_token": "<|endoftext|>",
43
+ "unk_token": "<unk>"
44
+ }
45
+ }