Lolalb commited on
Commit
dcba81b
·
verified ·
1 Parent(s): 222770c

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer.py +2 -0
tokenizer.py CHANGED
@@ -253,6 +253,8 @@ class ProteinTokenizer(PreTrainedTokenizerFast):
253
  # Add special tokens
254
  if add_special_tokens:
255
  encoded_inputs["input_ids"] = [[self.bos_token_id] + seq + [self.eos_token_id] for seq in encoded_inputs["input_ids"]]
 
 
256
 
257
  # Truncate
258
  if truncation:
 
253
  # Add special tokens
254
  if add_special_tokens:
255
  encoded_inputs["input_ids"] = [[self.bos_token_id] + seq + [self.eos_token_id] for seq in encoded_inputs["input_ids"]]
256
+ encoded_inputs["attention_mask"] = [1, 1] + encoded_inputs["attention_mask"]
257
+ encoded_inputs["special_tokens_mask"] = [1] + encoded_inputs["special_tokens_mask"] + [1]
258
 
259
  # Truncate
260
  if truncation: