feat: add pickle support
Browse files- tokenization_arcade100k.py +10 -0
tokenization_arcade100k.py
CHANGED
@@ -137,6 +137,16 @@ class Arcade100kTokenizer(PreTrainedTokenizer):
|
|
137 |
def __len__(self):
|
138 |
return self.tokenizer.n_vocab
|
139 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
@property
|
141 |
def vocab_size(self):
|
142 |
return self.tokenizer.n_vocab
|
|
|
137 |
def __len__(self):
|
138 |
return self.tokenizer.n_vocab
|
139 |
|
140 |
+
def __getstate__(self):
|
141 |
+
# Required for `pickle` support
|
142 |
+
state = self.__dict__.copy()
|
143 |
+
del state["tokenizer"]
|
144 |
+
return state
|
145 |
+
|
146 |
+
def __setstate__(self, state):
|
147 |
+
self.__dict__.update(state)
|
148 |
+
self.tokenizer = tiktoken.Encoding(**self._tiktoken_config)
|
149 |
+
|
150 |
@property
|
151 |
def vocab_size(self):
|
152 |
return self.tokenizer.n_vocab
|