Spaces:
Build error
Build error
| import re | |
| import jieba | |
| from pypinyin import pinyin, Style | |
| from data_gen.tts.data_gen_utils import PUNCS | |
| from data_gen.tts.txt_processors.base_text_processor import BaseTxtProcessor | |
| from utils.text_norm import NSWNormalizer | |
| class TxtProcessor(BaseTxtProcessor): | |
| table = {ord(f): ord(t) for f, t in zip( | |
| u':,。!?【】()%#@&1234567890', | |
| u':,.!?[]()%#@&1234567890')} | |
| def preprocess_text(text): | |
| text = text.translate(TxtProcessor.table) | |
| text = NSWNormalizer(text).normalize(remove_punc=False) | |
| text = re.sub("[\'\"()]+", "", text) | |
| text = re.sub("[-]+", " ", text) | |
| text = re.sub(f"[^ A-Za-z\u4e00-\u9fff{PUNCS}]", "", text) | |
| text = re.sub(f"([{PUNCS}])+", r"\1", text) # !! -> ! | |
| text = re.sub(f"([{PUNCS}])", r" \1 ", text) | |
| text = re.sub(rf"\s+", r"", text) | |
| text = re.sub(rf"[A-Za-z]+", r"$", text) | |
| return text | |
| def process(cls, txt, pre_align_args): | |
| txt = cls.preprocess_text(txt) | |
| shengmu = pinyin(txt, style=Style.INITIALS) # https://blog.csdn.net/zhoulei124/article/details/89055403 | |
| yunmu_finals = pinyin(txt, style=Style.FINALS) | |
| yunmu_tone3 = pinyin(txt, style=Style.FINALS_TONE3) | |
| yunmu = [[t[0] + '5'] if t[0] == f[0] else t for f, t in zip(yunmu_finals, yunmu_tone3)] \ | |
| if pre_align_args['use_tone'] else yunmu_finals | |
| assert len(shengmu) == len(yunmu) | |
| phs = ["|"] | |
| for a, b, c in zip(shengmu, yunmu, yunmu_finals): | |
| if a[0] == c[0]: | |
| phs += [a[0], "|"] | |
| else: | |
| phs += [a[0], b[0], "|"] | |
| return phs, txt | |