Spaces:
Runtime error
Runtime error
| import abc | |
| from typing import List, Tuple | |
| from TTS.tts.utils.text.punctuation import Punctuation | |
| class BasePhonemizer(abc.ABC): | |
| """Base phonemizer class | |
| Phonemization follows the following steps: | |
| 1. Preprocessing: | |
| - remove empty lines | |
| - remove punctuation | |
| - keep track of punctuation marks | |
| 2. Phonemization: | |
| - convert text to phonemes | |
| 3. Postprocessing: | |
| - join phonemes | |
| - restore punctuation marks | |
| Args: | |
| language (str): | |
| Language used by the phonemizer. | |
| punctuations (List[str]): | |
| List of punctuation marks to be preserved. | |
| keep_puncs (bool): | |
| Whether to preserve punctuation marks or not. | |
| """ | |
| def __init__(self, language, punctuations=Punctuation.default_puncs(), keep_puncs=False): | |
| # ensure the backend is installed on the system | |
| if not self.is_available(): | |
| raise RuntimeError("{} not installed on your system".format(self.name())) # pragma: nocover | |
| # ensure the backend support the requested language | |
| self._language = self._init_language(language) | |
| # setup punctuation processing | |
| self._keep_puncs = keep_puncs | |
| self._punctuator = Punctuation(punctuations) | |
| def _init_language(self, language): | |
| """Language initialization | |
| This method may be overloaded in child classes (see Segments backend) | |
| """ | |
| if not self.is_supported_language(language): | |
| raise RuntimeError(f'language "{language}" is not supported by the ' f"{self.name()} backend") | |
| return language | |
| def language(self): | |
| """The language code configured to be used for phonemization""" | |
| return self._language | |
| def name(): | |
| """The name of the backend""" | |
| ... | |
| def is_available(cls): | |
| """Returns True if the backend is installed, False otherwise""" | |
| ... | |
| def version(cls): | |
| """Return the backend version as a tuple (major, minor, patch)""" | |
| ... | |
| def supported_languages(): | |
| """Return a dict of language codes -> name supported by the backend""" | |
| ... | |
| def is_supported_language(self, language): | |
| """Returns True if `language` is supported by the backend""" | |
| return language in self.supported_languages() | |
| def _phonemize(self, text, separator): | |
| """The main phonemization method""" | |
| def _phonemize_preprocess(self, text) -> Tuple[List[str], List]: | |
| """Preprocess the text before phonemization | |
| 1. remove spaces | |
| 2. remove punctuation | |
| Override this if you need a different behaviour | |
| """ | |
| text = text.strip() | |
| if self._keep_puncs: | |
| # a tuple (text, punctuation marks) | |
| return self._punctuator.strip_to_restore(text) | |
| return [self._punctuator.strip(text)], [] | |
| def _phonemize_postprocess(self, phonemized, punctuations) -> str: | |
| """Postprocess the raw phonemized output | |
| Override this if you need a different behaviour | |
| """ | |
| if self._keep_puncs: | |
| return self._punctuator.restore(phonemized, punctuations)[0] | |
| return phonemized[0] | |
| def phonemize(self, text: str, separator="|", language: str = None) -> str: # pylint: disable=unused-argument | |
| """Returns the `text` phonemized for the given language | |
| Args: | |
| text (str): | |
| Text to be phonemized. | |
| separator (str): | |
| string separator used between phonemes. Default to '_'. | |
| Returns: | |
| (str): Phonemized text | |
| """ | |
| text, punctuations = self._phonemize_preprocess(text) | |
| phonemized = [] | |
| for t in text: | |
| p = self._phonemize(t, separator) | |
| phonemized.append(p) | |
| phonemized = self._phonemize_postprocess(phonemized, punctuations) | |
| return phonemized | |
| def print_logs(self, level: int = 0): | |
| indent = "\t" * level | |
| print(f"{indent}| > phoneme language: {self.language}") | |
| print(f"{indent}| > phoneme backend: {self.name()}") | |