Lolalb commited on
Commit
20853f2
·
verified ·
1 Parent(s): dcba81b

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer.py +2 -2
tokenizer.py CHANGED
@@ -253,8 +253,8 @@ class ProteinTokenizer(PreTrainedTokenizerFast):
253
  # Add special tokens
254
  if add_special_tokens:
255
  encoded_inputs["input_ids"] = [[self.bos_token_id] + seq + [self.eos_token_id] for seq in encoded_inputs["input_ids"]]
256
- encoded_inputs["attention_mask"] = [1, 1] + encoded_inputs["attention_mask"]
257
- encoded_inputs["special_tokens_mask"] = [1] + encoded_inputs["special_tokens_mask"] + [1]
258
 
259
  # Truncate
260
  if truncation:
 
253
  # Add special tokens
254
  if add_special_tokens:
255
  encoded_inputs["input_ids"] = [[self.bos_token_id] + seq + [self.eos_token_id] for seq in encoded_inputs["input_ids"]]
256
+ encoded_inputs["attention_mask"] = [[1, 1] + seq for seq in encoded_inputs["attention_mask"]]
257
+ encoded_inputs["special_tokens_mask"] = [[1] + seq + [1] for seq in encoded_inputs["special_tokens_mask"]]
258
 
259
  # Truncate
260
  if truncation: