Spaces:
Build error
Build error
| import os | |
| import torch | |
| from tokenizers import Tokenizer | |
| from TTS.tts.utils.text.cleaners import english_cleaners | |
| DEFAULT_VOCAB_FILE = os.path.join( | |
| os.path.dirname(os.path.realpath(__file__)), "../../utils/assets/tortoise/tokenizer.json" | |
| ) | |
| class VoiceBpeTokenizer: | |
| def __init__(self, vocab_file=DEFAULT_VOCAB_FILE, vocab_str=None): | |
| self.tokenizer = None | |
| if vocab_file is not None: | |
| self.tokenizer = Tokenizer.from_file(vocab_file) | |
| if vocab_str is not None: | |
| self.tokenizer = Tokenizer.from_str(vocab_str) | |
| def preprocess_text(self, txt): | |
| txt = english_cleaners(txt) | |
| return txt | |
| def encode(self, txt): | |
| txt = self.preprocess_text(txt) | |
| txt = txt.replace(" ", "[SPACE]") | |
| return self.tokenizer.encode(txt).ids | |
| def decode(self, seq): | |
| if isinstance(seq, torch.Tensor): | |
| seq = seq.cpu().numpy() | |
| txt = self.tokenizer.decode(seq, skip_special_tokens=False).replace(" ", "") | |
| txt = txt.replace("[SPACE]", " ") | |
| txt = txt.replace("[STOP]", "") | |
| txt = txt.replace("[UNK]", "") | |
| return txt | |