File size: 428 Bytes
0b32ad6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 |
from s3prl.dataio.encoder.tokenizer import CharacterTokenizer, default_phoneme_tokenizer
def test_tokenizer():
char_tokenizer = CharacterTokenizer()
phone_tokenizer = default_phoneme_tokenizer()
char_text = "HELLO WORLD"
char_text_enc = char_tokenizer.encode(char_text)
char_text_dec = char_tokenizer.decode(char_text_enc)
assert isinstance(char_text_enc, list)
assert char_text == char_text_dec
|