File size: 428 Bytes
0b32ad6
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
from s3prl.dataio.encoder.tokenizer import CharacterTokenizer, default_phoneme_tokenizer


def test_tokenizer():
    char_tokenizer = CharacterTokenizer()
    phone_tokenizer = default_phoneme_tokenizer()

    char_text = "HELLO WORLD"
    char_text_enc = char_tokenizer.encode(char_text)
    char_text_dec = char_tokenizer.decode(char_text_enc)

    assert isinstance(char_text_enc, list)
    assert char_text == char_text_dec