Spaces:
Runtime error
Runtime error
File size: 878 Bytes
06a851e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
from transformers import CamembertTokenizer
def get_tokenizer(model_name='camembert-base'):
tokenizer = CamembertTokenizer.from_pretrained(model_name)
return tokenizer
def tokenize_encode_corpus(tokenizer, descriptions, max_len):
encoded_corpus = tokenizer(text=descriptions,
add_special_tokens=True,
padding='max_length',
truncation='longest_first',
max_length=max_len,
return_attention_mask=True)
return encoded_corpus
def extract_inputs_masks(encoded_corpus):
try:
input_ids = encoded_corpus['input_ids']
attention_mask = encoded_corpus['attention_mask']
except:
print('Available keys are = ', encoded_corpus.keys())
return None
return input_ids, attention_mask
|