# Copyright (c) 2024 Amphion.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import re
import os
from typing import List, Pattern, Union
from phonemizer.utils import list2str, str2list
from phonemizer.backend import EspeakBackend
from phonemizer.backend.espeak.language_switch import LanguageSwitch
from phonemizer.backend.espeak.words_mismatch import WordMismatch
from phonemizer.punctuation import Punctuation
from phonemizer.separator import Separator


class TextTokenizer:
    """Phonemize Text."""

    def __init__(
        self,
        language="en-us",
        backend="espeak",
        separator=Separator(word="|_|", syllable="-", phone="|"),
        preserve_punctuation=True,
        with_stress: bool = False,
        tie: Union[bool, str] = False,
        language_switch: LanguageSwitch = "remove-flags",
        words_mismatch: WordMismatch = "ignore",
    ) -> None:
        self.preserve_punctuation_marks = ",.?!;:'…"
        self.backend = EspeakBackend(
            language,
            punctuation_marks=self.preserve_punctuation_marks,
            preserve_punctuation=preserve_punctuation,
            with_stress=with_stress,
            tie=tie,
            language_switch=language_switch,
            words_mismatch=words_mismatch,
        )

        self.separator = separator

    # convert chinese punctuation to english punctuation
    def convert_chinese_punctuation(self, text: str) -> str:
        text = text.replace("，", ",")
        text = text.replace("。", ".")
        text = text.replace("！", "!")
        text = text.replace("？", "?")
        text = text.replace("；", ";")
        text = text.replace("：", ":")
        text = text.replace("、", ",")
        text = text.replace("‘", "'")
        text = text.replace("’", "'")
        text = text.replace("⋯", "…")
        text = text.replace("···", "…")
        text = text.replace("・・・", "…")
        text = text.replace("...", "…")
        return text

    def __call__(self, text, strip=True) -> List[str]:

        text_type = type(text)
        normalized_text = []
        for line in str2list(text):
            line = self.convert_chinese_punctuation(line.strip())
            line = re.sub(r"[^\w\s_,\.\?!;:\'…]", "", line)
            line = re.sub(r"\s*([,\.\?!;:\'…])\s*", r"\1", line)
            line = re.sub(r"\s+", " ", line)
            normalized_text.append(line)
        # print("Normalized test: ", normalized_text[0])
        phonemized = self.backend.phonemize(
            normalized_text, separator=self.separator, strip=strip, njobs=1
        )
        if text_type == str:
            phonemized = re.sub(r"([,\.\?!;:\'…])", r"|\1|", list2str(phonemized))
            phonemized = re.sub(r"\|+", "|", phonemized)
            phonemized = phonemized.rstrip("|")
        else:
            for i in range(len(phonemized)):
                phonemized[i] = re.sub(r"([,\.\?!;:\'…])", r"|\1|", phonemized[i])
                phonemized[i] = re.sub(r"\|+", "|", phonemized[i])
                phonemized[i] = phonemized[i].rstrip("|")
        return phonemized