jon-tow commited on
Commit
ee2a4ae
·
1 Parent(s): 9ada655

feat: expose tiktoken tokenizer merge ranks and special tokens

Browse files
Files changed (1) hide show
  1. tokenization_arcade100k.py +3 -0
tokenization_arcade100k.py CHANGED
@@ -126,6 +126,9 @@ class Arcade100kTokenizer(PreTrainedTokenizer):
126
  self.decoder.update({i: n for n, i in self.tokenizer._special_tokens.items()})
127
  self.eos_token = self.decoder[self.tokenizer.eot_token]
128
  self.pad_token = self.decoder[self.tokenizer.eot_token]
 
 
 
129
 
130
  def __len__(self):
131
  return self.tokenizer.n_vocab
 
126
  self.decoder.update({i: n for n, i in self.tokenizer._special_tokens.items()})
127
  self.eos_token = self.decoder[self.tokenizer.eot_token]
128
  self.pad_token = self.decoder[self.tokenizer.eot_token]
129
+ # Expose for convenience
130
+ self.mergeable_ranks = self.tokenizer._mergeable_ranks
131
+ self.special_tokens = self.tokenizer._special_tokens
132
 
133
  def __len__(self):
134
  return self.tokenizer.n_vocab