Spaces:

Princess7317
/

TuneWeave

Build error

App Files Files Community

Princess7317 commited on Apr 4

Commit

3ae9fdb

verified ·

1 Parent(s): 5e412c8

Upload 82 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
diffrhythm/.DS_Store +0 -0
diffrhythm/config/defaults.ini +94 -0
diffrhythm/config/diffrhythm-1b.json +13 -0
diffrhythm/g2p/__pycache__/g2p_generation.cpython-310.pyc +0 -0
diffrhythm/g2p/__pycache__/g2p_generation.cpython-311.pyc +0 -0
diffrhythm/g2p/g2p/__init__.py +87 -0
diffrhythm/g2p/g2p/__pycache__/__init__.cpython-310.pyc +0 -0
diffrhythm/g2p/g2p/__pycache__/__init__.cpython-311.pyc +0 -0
diffrhythm/g2p/g2p/__pycache__/chinese_model_g2p.cpython-310.pyc +0 -0
diffrhythm/g2p/g2p/__pycache__/chinese_model_g2p.cpython-311.pyc +0 -0
diffrhythm/g2p/g2p/__pycache__/cleaners.cpython-310.pyc +0 -0
diffrhythm/g2p/g2p/__pycache__/cleaners.cpython-311.pyc +0 -0
diffrhythm/g2p/g2p/__pycache__/english.cpython-310.pyc +0 -0
diffrhythm/g2p/g2p/__pycache__/english.cpython-311.pyc +0 -0
diffrhythm/g2p/g2p/__pycache__/french.cpython-310.pyc +0 -0
diffrhythm/g2p/g2p/__pycache__/french.cpython-311.pyc +0 -0
diffrhythm/g2p/g2p/__pycache__/german.cpython-310.pyc +0 -0
diffrhythm/g2p/g2p/__pycache__/german.cpython-311.pyc +0 -0
diffrhythm/g2p/g2p/__pycache__/japanese.cpython-310.pyc +0 -0
diffrhythm/g2p/g2p/__pycache__/japanese.cpython-311.pyc +0 -0
diffrhythm/g2p/g2p/__pycache__/korean.cpython-310.pyc +0 -0
diffrhythm/g2p/g2p/__pycache__/korean.cpython-311.pyc +0 -0
diffrhythm/g2p/g2p/__pycache__/mandarin.cpython-310.pyc +0 -0
diffrhythm/g2p/g2p/__pycache__/mandarin.cpython-311.pyc +0 -0
diffrhythm/g2p/g2p/__pycache__/text_tokenizers.cpython-310.pyc +0 -0
diffrhythm/g2p/g2p/__pycache__/text_tokenizers.cpython-311.pyc +0 -0
diffrhythm/g2p/g2p/chinese_model_g2p.py +213 -0
diffrhythm/g2p/g2p/cleaners.py +31 -0
diffrhythm/g2p/g2p/english.py +202 -0
diffrhythm/g2p/g2p/french.py +149 -0
diffrhythm/g2p/g2p/german.py +94 -0
diffrhythm/g2p/g2p/japanese.py +816 -0
diffrhythm/g2p/g2p/korean.py +81 -0
diffrhythm/g2p/g2p/mandarin.py +600 -0
diffrhythm/g2p/g2p/text_tokenizers.py +85 -0
diffrhythm/g2p/g2p/vocab.json +372 -0
diffrhythm/g2p/g2p_generation.py +133 -0
diffrhythm/g2p/sources/bpmf_2_pinyin.txt +41 -0
diffrhythm/g2p/sources/chinese_lexicon.txt +3 -0
diffrhythm/g2p/sources/g2p_chinese_model/config.json +819 -0
diffrhythm/g2p/sources/g2p_chinese_model/poly_bert_model.onnx +3 -0
diffrhythm/g2p/sources/g2p_chinese_model/polychar.txt +159 -0
diffrhythm/g2p/sources/g2p_chinese_model/polydict.json +393 -0
diffrhythm/g2p/sources/g2p_chinese_model/polydict_r.json +393 -0
diffrhythm/g2p/sources/g2p_chinese_model/vocab.txt +0 -0
diffrhythm/g2p/sources/pinyin_2_bpmf.txt +429 -0
diffrhythm/g2p/utils/__pycache__/front_utils.cpython-310.pyc +0 -0
diffrhythm/g2p/utils/__pycache__/front_utils.cpython-311.pyc +0 -0
diffrhythm/g2p/utils/__pycache__/g2p.cpython-310.pyc +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+diffrhythm/g2p/sources/chinese_lexicon.txt filter=lfs diff=lfs merge=lfs -text

diffrhythm/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

diffrhythm/config/defaults.ini ADDED Viewed

	@@ -0,0 +1,94 @@

+[DEFAULTS]
+#name of the run
+exp_name = F5
+# the batch size
+batch_size = 8
+# the chunk size
+max_frames = 3000
+min_frames = 10
+# number of CPU workers for the DataLoader
+num_workers = 4
+# the random seed
+seed = 42
+# Batches for gradient accumulation
+accum_batches = 1
+# Number of steps between checkpoints
+checkpoint_every = 10000
+# trainer checkpoint file to restart training from
+ckpt_path = ''
+# model checkpoint file to start a new training run from
+pretrained_ckpt_path = ''
+# Checkpoint path for the pretransform model if needed
+pretransform_ckpt_path = ''
+# configuration model specifying model hyperparameters
+model_config = ''
+# configuration for datasets
+dataset_config = ''
+# directory to save the checkpoints in
+save_dir = ''
+# grad norm
+max_grad_norm = 1.0
+# grad accu
+grad_accumulation_steps = 1
+# lr
+learning_rate = 7.5e-5
+# epoch
+epochs = 110
+# warmup steps
+num_warmup_updates = 2000
+# save checkpoint per steps
+save_per_updates = 5000
+# save last checkpoint per steps
+last_per_steps = 5000
+prompt_path = "/mnt/sfs/music/lance/style-lance-full|/mnt/sfs/music/lance/style-lance-cnen-music-second"
+lrc_path = "/mnt/sfs/music/lance/lrc-lance-emb-full|/mnt/sfs/music/lance/lrc-lance-cnen-second"
+latent_path = "/mnt/sfs/music/lance/latent-lance|/mnt/sfs/music/lance/latent-lance-cnen-music-second-1|/mnt/sfs/music/lance/latent-lance-cnen-music-second-2"
+audio_drop_prob = 0.3
+cond_drop_prob = 0.0
+style_drop_prob = 0.1
+lrc_drop_prob = 0.1
+align_lyrics = 0
+lyrics_slice = 0
+parse_lyrics = 1
+skip_empty_lyrics = 0
+lyrics_shift = -1
+use_style_prompt = 1
+tokenizer_type = gpt2
+reset_lr = 0
+resumable_with_seed = 666
+downsample_rate = 2048
+grad_ckpt = 0
+dataset_path = "/mnt/sfs/music/hkchen/workspace/F5-TTS-HW/filelists/music123latent_asred_bpmstyle_cnen_pure1"
+pure_prob = 0.0

diffrhythm/config/diffrhythm-1b.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+    "model_type": "diffrhythm",
+    "model": {
+        "dim": 2048,
+        "depth": 16,
+        "heads": 32,
+        "ff_mult": 4,
+        "text_dim": 512,
+        "conv_layers": 4,
+        "mel_dim": 64,
+        "text_num_embeds": 363
+    }
+}

diffrhythm/g2p/__pycache__/g2p_generation.cpython-310.pyc ADDED Viewed

Binary file (2.61 kB). View file

diffrhythm/g2p/__pycache__/g2p_generation.cpython-311.pyc ADDED Viewed

Binary file (4.85 kB). View file

diffrhythm/g2p/g2p/__init__.py ADDED Viewed

	@@ -0,0 +1,87 @@

+# Copyright (c) 2024 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from diffrhythm.g2p.g2p import cleaners
+from tokenizers import Tokenizer
+from diffrhythm.g2p.g2p.text_tokenizers import TextTokenizer
+import LangSegment
+import json
+import re
+class PhonemeBpeTokenizer:
+    def __init__(self, vacab_path="./diffrhythm/g2p/g2p/vocab.json"):
+        self.lang2backend = {
+            "zh": "cmn",
+            "ja": "ja",
+            "en": "en-us",
+            "fr": "fr-fr",
+            "ko": "ko",
+            "de": "de",
+        }
+        self.text_tokenizers = {}
+        self.int_text_tokenizers()
+        with open(vacab_path, "r") as f:
+            json_data = f.read()
+        data = json.loads(json_data)
+        self.vocab = data["vocab"]
+        LangSegment.setfilters(["en", "zh", "ja", "ko", "fr", "de"])
+    def int_text_tokenizers(self):
+        for key, value in self.lang2backend.items():
+            self.text_tokenizers[key] = TextTokenizer(language=value)
+    def tokenize(self, text, sentence, language):
+        # 1. convert text to phoneme
+        phonemes = []
+        if language == "auto":
+            seglist = LangSegment.getTexts(text)
+            tmp_ph = []
+            for seg in seglist:
+                tmp_ph.append(
+                    self._clean_text(
+                        seg["text"], sentence, seg["lang"], ["cjekfd_cleaners"]
+                    )
+                )
+            phonemes = "|_|".join(tmp_ph)
+        else:
+            phonemes = self._clean_text(text, sentence, language, ["cjekfd_cleaners"])
+        # print('clean text: ', phonemes)
+        # 2. tokenize phonemes
+        phoneme_tokens = self.phoneme2token(phonemes)
+        # print('encode: ', phoneme_tokens)
+        # # 3. decode tokens [optional]
+        # decoded_text = self.tokenizer.decode(phoneme_tokens)
+        # print('decoded: ', decoded_text)
+        return phonemes, phoneme_tokens
+    def _clean_text(self, text, sentence, language, cleaner_names):
+        for name in cleaner_names:
+            cleaner = getattr(cleaners, name)
+            if not cleaner:
+                raise Exception("Unknown cleaner: %s" % name)
+        text = cleaner(text, sentence, language, self.text_tokenizers)
+        return text
+    def phoneme2token(self, phonemes):
+        tokens = []
+        if isinstance(phonemes, list):
+            for phone in phonemes:
+                phone = phone.split("\t")[0]
+                phonemes_split = phone.split("|")
+                tokens.append(
+                    [self.vocab[p] for p in phonemes_split if p in self.vocab]
+                )
+        else:
+            phonemes = phonemes.split("\t")[0]
+            phonemes_split = phonemes.split("|")
+            tokens = [self.vocab[p] for p in phonemes_split if p in self.vocab]
+        return tokens

diffrhythm/g2p/g2p/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (2.62 kB). View file

diffrhythm/g2p/g2p/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (4.6 kB). View file

diffrhythm/g2p/g2p/__pycache__/chinese_model_g2p.cpython-310.pyc ADDED Viewed

Binary file (6.87 kB). View file

diffrhythm/g2p/g2p/__pycache__/chinese_model_g2p.cpython-311.pyc ADDED Viewed

Binary file (13.1 kB). View file

diffrhythm/g2p/g2p/__pycache__/cleaners.cpython-310.pyc ADDED Viewed

Binary file (950 Bytes). View file

diffrhythm/g2p/g2p/__pycache__/cleaners.cpython-311.pyc ADDED Viewed

Binary file (1.5 kB). View file

diffrhythm/g2p/g2p/__pycache__/english.cpython-310.pyc ADDED Viewed

Binary file (4.93 kB). View file

diffrhythm/g2p/g2p/__pycache__/english.cpython-311.pyc ADDED Viewed

Binary file (9.27 kB). View file

diffrhythm/g2p/g2p/__pycache__/french.cpython-310.pyc ADDED Viewed

Binary file (3.66 kB). View file

diffrhythm/g2p/g2p/__pycache__/french.cpython-311.pyc ADDED Viewed

Binary file (5.74 kB). View file

diffrhythm/g2p/g2p/__pycache__/german.cpython-310.pyc ADDED Viewed

Binary file (2.45 kB). View file

diffrhythm/g2p/g2p/__pycache__/german.cpython-311.pyc ADDED Viewed

Binary file (4.03 kB). View file

diffrhythm/g2p/g2p/__pycache__/japanese.cpython-310.pyc ADDED Viewed

Binary file (17.7 kB). View file

diffrhythm/g2p/g2p/__pycache__/japanese.cpython-311.pyc ADDED Viewed

Binary file (28.1 kB). View file

diffrhythm/g2p/g2p/__pycache__/korean.cpython-310.pyc ADDED Viewed

Binary file (1.94 kB). View file

diffrhythm/g2p/g2p/__pycache__/korean.cpython-311.pyc ADDED Viewed

Binary file (2.91 kB). View file

diffrhythm/g2p/g2p/__pycache__/mandarin.cpython-310.pyc ADDED Viewed

Binary file (12.6 kB). View file

diffrhythm/g2p/g2p/__pycache__/mandarin.cpython-311.pyc ADDED Viewed

Binary file (25.2 kB). View file

diffrhythm/g2p/g2p/__pycache__/text_tokenizers.cpython-310.pyc ADDED Viewed

Binary file (2.67 kB). View file

diffrhythm/g2p/g2p/__pycache__/text_tokenizers.cpython-311.pyc ADDED Viewed

Binary file (4.78 kB). View file

diffrhythm/g2p/g2p/chinese_model_g2p.py ADDED Viewed

	@@ -0,0 +1,213 @@

+# Copyright (c) 2024 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import os
+import numpy as np
+import torch
+from torch.utils.data import DataLoader
+import json
+from transformers import BertTokenizer
+from torch.utils.data import Dataset
+from transformers.models.bert.modeling_bert import *
+import torch
+import torch.nn.functional as F
+from onnxruntime import InferenceSession, GraphOptimizationLevel, SessionOptions
+class PolyDataset(Dataset):
+    def __init__(self, words, labels, word_pad_idx=0, label_pad_idx=-1):
+        self.dataset = self.preprocess(words, labels)
+        self.word_pad_idx = word_pad_idx
+        self.label_pad_idx = label_pad_idx
+    def preprocess(self, origin_sentences, origin_labels):
+        """
+        Maps tokens and tags to their indices and stores them in the dict data.
+        examples:
+            word:['[CLS]', '浙', '商', '银', '行', '企', '业', '信', '贷', '部']
+            sentence:([101, 3851, 1555, 7213, 6121, 821, 689, 928, 6587, 6956],
+                        array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10]))
+            label:[3, 13, 13, 13, 0, 0, 0, 0, 0]
+        """
+        data = []
+        labels = []
+        sentences = []
+        # tokenize
+        for line in origin_sentences:
+            # replace each token by its index
+            # we can not use encode_plus because our sentences are aligned to labels in list type
+            words = []
+            word_lens = []
+            for token in line:
+                words.append(token)
+                word_lens.append(1)
+            token_start_idxs = 1 + np.cumsum([0] + word_lens[:-1])
+            sentences.append(((words, token_start_idxs), 0))
+        ###
+        for tag in origin_labels:
+            labels.append(tag)
+        for sentence, label in zip(sentences, labels):
+            data.append((sentence, label))
+        return data
+    def __getitem__(self, idx):
+        """sample data to get batch"""
+        word = self.dataset[idx][0]
+        label = self.dataset[idx][1]
+        return [word, label]
+    def __len__(self):
+        """get dataset size"""
+        return len(self.dataset)
+    def collate_fn(self, batch):
+        sentences = [x[0][0] for x in batch]
+        ori_sents = [x[0][1] for x in batch]
+        labels = [x[1] for x in batch]
+        batch_len = len(sentences)
+        # compute length of longest sentence in batch
+        max_len = max([len(s[0]) for s in sentences])
+        max_label_len = 0
+        batch_data = np.ones((batch_len, max_len))
+        batch_label_starts = []
+        # padding and aligning
+        for j in range(batch_len):
+            cur_len = len(sentences[j][0])
+            batch_data[j][:cur_len] = sentences[j][0]
+            label_start_idx = sentences[j][-1]
+            label_starts = np.zeros(max_len)
+            label_starts[[idx for idx in label_start_idx if idx < max_len]] = 1
+            batch_label_starts.append(label_starts)
+            max_label_len = max(int(sum(label_starts)), max_label_len)
+        # padding label
+        batch_labels = self.label_pad_idx * np.ones((batch_len, max_label_len))
+        batch_pmasks = self.label_pad_idx * np.ones((batch_len, max_label_len))
+        for j in range(batch_len):
+            cur_tags_len = len(labels[j])
+            batch_labels[j][:cur_tags_len] = labels[j]
+            batch_pmasks[j][:cur_tags_len] = [
+                1 if item > 0 else 0 for item in labels[j]
+            ]
+        # convert data to torch LongTensors
+        batch_data = torch.tensor(batch_data, dtype=torch.long)
+        batch_label_starts = torch.tensor(batch_label_starts, dtype=torch.long)
+        batch_labels = torch.tensor(batch_labels, dtype=torch.long)
+        batch_pmasks = torch.tensor(batch_pmasks, dtype=torch.long)
+        return [batch_data, batch_label_starts, batch_labels, batch_pmasks, ori_sents]
+class BertPolyPredict:
+    def __init__(self, bert_model, jsonr_file, json_file):
+        self.tokenizer = BertTokenizer.from_pretrained(bert_model, do_lower_case=True)
+        with open(jsonr_file, "r", encoding="utf8") as fp:
+            self.pron_dict = json.load(fp)
+        with open(json_file, "r", encoding="utf8") as fp:
+            self.pron_dict_id_2_pinyin = json.load(fp)
+        self.num_polyphone = len(self.pron_dict)
+        self.device = "cpu"
+        self.polydataset = PolyDataset
+        options = SessionOptions()  # initialize session options
+        options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
+        print(os.path.join(bert_model, "poly_bert_model.onnx"))
+        self.session = InferenceSession(
+            os.path.join(bert_model, "poly_bert_model.onnx"),
+            sess_options=options,
+            providers=[
+                "CoreMLExecutionProvider",  # Replace CUDA with CoreML
+        "CPUExecutionProvider",
+            ],  # CPUExecutionProvider #CUDAExecutionProvider
+        )
+        # self.session.set_providers(['CUDAExecutionProvider', "CPUExecutionProvider"], [ {'device_id': 0}])
+        # disable session.run() fallback mechanism, it prevents for a reset of the execution provider
+        self.session.disable_fallback()
+    def predict_process(self, txt_list):
+        word_test, label_test, texts_test = self.get_examples_po(txt_list)
+        data = self.polydataset(word_test, label_test)
+        predict_loader = DataLoader(
+            data, batch_size=1, shuffle=False, collate_fn=data.collate_fn
+        )
+        pred_tags = self.predict_onnx(predict_loader)
+        return pred_tags
+    def predict_onnx(self, dev_loader):
+        pred_tags = []
+        with torch.no_grad():
+            for idx, batch_samples in enumerate(dev_loader):
+                # [batch_data, batch_label_starts, batch_labels, batch_pmasks, ori_sents]
+                batch_data, batch_label_starts, batch_labels, batch_pmasks, _ = (
+                    batch_samples
+                )
+                # shift tensors to GPU if available
+                batch_data = batch_data.to(self.device)
+                batch_label_starts = batch_label_starts.to(self.device)
+                batch_labels = batch_labels.to(self.device)
+                batch_pmasks = batch_pmasks.to(self.device)
+                batch_data = np.asarray(batch_data, dtype=np.float32)
+                batch_pmasks = np.asarray(batch_pmasks,  dtype=np.float32)
+                # batch_output = self.session.run(output_names=['outputs'], input_feed={"input_ids":batch_data, "input_pmasks": batch_pmasks})[0][0]
+                batch_output = self.session.run(
+                    output_names=["outputs"], input_feed={"input_ids": batch_data}
+                )[0]
+                label_masks = batch_pmasks == 1
+                batch_labels = batch_labels.to("cpu").numpy()
+                for i, indices in enumerate(np.argmax(batch_output, axis=2)):
+                    for j, idx in enumerate(indices):
+                        if label_masks[i][j]:
+                            # pred_tag.append(idx)
+                            pred_tags.append(self.pron_dict_id_2_pinyin[str(idx + 1)])
+        return pred_tags
+    def get_examples_po(self, text_list):
+        word_list = []
+        label_list = []
+        sentence_list = []
+        id = 0
+        for line in [text_list]:
+            sentence = line[0]
+            words = []
+            tokens = line[0]
+            index = line[-1]
+            front = index
+            back = len(tokens) - index - 1
+            labels = [0] * front + [1] + [0] * back
+            words = ["[CLS]"] + [item for item in sentence]
+            words = self.tokenizer.convert_tokens_to_ids(words)
+            word_list.append(words)
+            label_list.append(labels)
+            sentence_list.append(sentence)
+            id += 1
+            # mask_list.append(masks)
+            assert len(labels) + 1 == len(words), print(
+                (
+                    poly,
+                    sentence,
+                    words,
+                    labels,
+                    sentence,
+                    len(sentence),
+                    len(words),
+                    len(labels),
+                )
+            )
+            assert len(labels) + 1 == len(
+                words
+            ), "Number of labels does not match number of words"
+            assert len(labels) == len(
+                sentence
+            ), "Number of labels does not match number of sentences"
+            assert len(word_list) == len(
+                label_list
+            ), "Number of label sentences does not match number of word sentences"
+        return word_list, label_list, text_list

diffrhythm/g2p/g2p/cleaners.py ADDED Viewed

	@@ -0,0 +1,31 @@

+# Copyright (c) 2024 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import re
+from diffrhythm.g2p.g2p.japanese import japanese_to_ipa
+from diffrhythm.g2p.g2p.mandarin import chinese_to_ipa
+from diffrhythm.g2p.g2p.english import english_to_ipa
+from diffrhythm.g2p.g2p.french import french_to_ipa
+from diffrhythm.g2p.g2p.korean import korean_to_ipa
+from diffrhythm.g2p.g2p.german import german_to_ipa
+def cjekfd_cleaners(text, sentence, language, text_tokenizers):
+    if language == "zh":
+        return chinese_to_ipa(text, sentence, text_tokenizers["zh"])
+    elif language == "ja":
+        return japanese_to_ipa(text, text_tokenizers["ja"])
+    elif language == "en":
+        return english_to_ipa(text, text_tokenizers["en"])
+    elif language == "fr":
+        return french_to_ipa(text, text_tokenizers["fr"])
+    elif language == "ko":
+        return korean_to_ipa(text, text_tokenizers["ko"])
+    elif language == "de":
+        return german_to_ipa(text, text_tokenizers["de"])
+    else:
+        raise Exception("Unknown language: %s" % language)
+        return None

diffrhythm/g2p/g2p/english.py ADDED Viewed

	@@ -0,0 +1,202 @@

+# Copyright (c) 2024 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import re
+from unidecode import unidecode
+import inflect
+"""
+    Text clean time
+"""
+_inflect = inflect.engine()
+_comma_number_re = re.compile(r"([0-9][0-9\,]+[0-9])")
+_decimal_number_re = re.compile(r"([0-9]+\.[0-9]+)")
+_percent_number_re = re.compile(r"([0-9\.\,]*[0-9]+%)")
+_pounds_re = re.compile(r"£([0-9\,]*[0-9]+)")
+_dollars_re = re.compile(r"\$([0-9\.\,]*[0-9]+)")
+_fraction_re = re.compile(r"([0-9]+)/([0-9]+)")
+_ordinal_re = re.compile(r"[0-9]+(st|nd|rd|th)")
+_number_re = re.compile(r"[0-9]+")
+# List of (regular expression, replacement) pairs for abbreviations:
+_abbreviations = [
+    (re.compile("\\b%s\\b" % x[0], re.IGNORECASE), x[1])
+    for x in [
+        ("mrs", "misess"),
+        ("mr", "mister"),
+        ("dr", "doctor"),
+        ("st", "saint"),
+        ("co", "company"),
+        ("jr", "junior"),
+        ("maj", "major"),
+        ("gen", "general"),
+        ("drs", "doctors"),
+        ("rev", "reverend"),
+        ("lt", "lieutenant"),
+        ("hon", "honorable"),
+        ("sgt", "sergeant"),
+        ("capt", "captain"),
+        ("esq", "esquire"),
+        ("ltd", "limited"),
+        ("col", "colonel"),
+        ("ft", "fort"),
+        ("etc", "et cetera"),
+        ("btw", "by the way"),
+    ]
+]
+_special_map = [
+    ("t|ɹ", "tɹ"),
+    ("d|ɹ", "dɹ"),
+    ("t|s", "ts"),
+    ("d|z", "dz"),
+    ("ɪ|ɹ", "ɪɹ"),
+    ("ɐ", "ɚ"),
+    ("ᵻ", "ɪ"),
+    ("əl", "l"),
+    ("x", "k"),
+    ("ɬ", "l"),
+    ("ʔ", "t"),
+    ("n̩", "n"),
+    ("oː|ɹ", "oːɹ"),
+]
+def expand_abbreviations(text):
+    for regex, replacement in _abbreviations:
+        text = re.sub(regex, replacement, text)
+    return text
+def _remove_commas(m):
+    return m.group(1).replace(",", "")
+def _expand_decimal_point(m):
+    return m.group(1).replace(".", " point ")
+def _expand_percent(m):
+    return m.group(1).replace("%", " percent ")
+def _expand_dollars(m):
+    match = m.group(1)
+    parts = match.split(".")
+    if len(parts) > 2:
+        return " " + match + " dollars "  # Unexpected format
+    dollars = int(parts[0]) if parts[0] else 0
+    cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
+    if dollars and cents:
+        dollar_unit = "dollar" if dollars == 1 else "dollars"
+        cent_unit = "cent" if cents == 1 else "cents"
+        return " %s %s, %s %s " % (dollars, dollar_unit, cents, cent_unit)
+    elif dollars:
+        dollar_unit = "dollar" if dollars == 1 else "dollars"
+        return " %s %s " % (dollars, dollar_unit)
+    elif cents:
+        cent_unit = "cent" if cents == 1 else "cents"
+        return " %s %s " % (cents, cent_unit)
+    else:
+        return " zero dollars "
+def fraction_to_words(numerator, denominator):
+    if numerator == 1 and denominator == 2:
+        return " one half "
+    if numerator == 1 and denominator == 4:
+        return " one quarter "
+    if denominator == 2:
+        return " " + _inflect.number_to_words(numerator) + " halves "
+    if denominator == 4:
+        return " " + _inflect.number_to_words(numerator) + " quarters "
+    return (
+        " "
+        + _inflect.number_to_words(numerator)
+        + " "
+        + _inflect.ordinal(_inflect.number_to_words(denominator))
+        + " "
+    )
+def _expand_fraction(m):
+    numerator = int(m.group(1))
+    denominator = int(m.group(2))
+    return fraction_to_words(numerator, denominator)
+def _expand_ordinal(m):
+    return " " + _inflect.number_to_words(m.group(0)) + " "
+def _expand_number(m):
+    num = int(m.group(0))
+    if num > 1000 and num < 3000:
+        if num == 2000:
+            return " two thousand "
+        elif num > 2000 and num < 2010:
+            return " two thousand " + _inflect.number_to_words(num % 100) + " "
+        elif num % 100 == 0:
+            return " " + _inflect.number_to_words(num // 100) + " hundred "
+        else:
+            return (
+                " "
+                + _inflect.number_to_words(num, andword="", zero="oh", group=2).replace(
+                    ", ", " "
+                )
+                + " "
+            )
+    else:
+        return " " + _inflect.number_to_words(num, andword="") + " "
+# Normalize numbers pronunciation
+def normalize_numbers(text):
+    text = re.sub(_comma_number_re, _remove_commas, text)
+    text = re.sub(_pounds_re, r"\1 pounds", text)
+    text = re.sub(_dollars_re, _expand_dollars, text)
+    text = re.sub(_fraction_re, _expand_fraction, text)
+    text = re.sub(_decimal_number_re, _expand_decimal_point, text)
+    text = re.sub(_percent_number_re, _expand_percent, text)
+    text = re.sub(_ordinal_re, _expand_ordinal, text)
+    text = re.sub(_number_re, _expand_number, text)
+    return text
+def _english_to_ipa(text):
+    # text = unidecode(text).lower()
+    text = expand_abbreviations(text)
+    text = normalize_numbers(text)
+    return text
+# special map
+def special_map(text):
+    for regex, replacement in _special_map:
+        regex = regex.replace("|", "\|")
+        while re.search(r"(^|[_|]){}([_|]|$)".format(regex), text):
+            text = re.sub(
+                r"(^|[_|]){}([_|]|$)".format(regex), r"\1{}\2".format(replacement), text
+            )
+    # text = re.sub(r'([,.!?])', r'|\1', text)
+    return text
+# Add some special operation
+def english_to_ipa(text, text_tokenizer):
+    if type(text) == str:
+        text = _english_to_ipa(text)
+    else:
+        text = [_english_to_ipa(t) for t in text]
+    phonemes = text_tokenizer(text)
+    if phonemes[-1] in "p⁼ʰmftnlkxʃs`ɹaoəɛɪeɑʊŋiuɥwæjː":
+        phonemes += "|_"
+    if type(text) == str:
+        return special_map(phonemes)
+    else:
+        result_ph = []
+        for phone in phonemes:
+            result_ph.append(special_map(phone))
+        return result_ph

diffrhythm/g2p/g2p/french.py ADDED Viewed

	@@ -0,0 +1,149 @@

+# Copyright (c) 2024 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import re
+"""
+    Text clean time
+"""
+# List of (regular expression, replacement) pairs for abbreviations in french:
+_abbreviations = [
+    (re.compile("\\b%s\\." % x[0], re.IGNORECASE), x[1])
+    for x in [
+        ("M", "monsieur"),
+        ("Mlle", "mademoiselle"),
+        ("Mlles", "mesdemoiselles"),
+        ("Mme", "Madame"),
+        ("Mmes", "Mesdames"),
+        ("N.B", "nota bene"),
+        ("M", "monsieur"),
+        ("p.c.q", "parce que"),
+        ("Pr", "professeur"),
+        ("qqch", "quelque chose"),
+        ("rdv", "rendez-vous"),
+        ("max", "maximum"),
+        ("min", "minimum"),
+        ("no", "numéro"),
+        ("adr", "adresse"),
+        ("dr", "docteur"),
+        ("st", "saint"),
+        ("co", "companie"),
+        ("jr", "junior"),
+        ("sgt", "sergent"),
+        ("capt", "capitain"),
+        ("col", "colonel"),
+        ("av", "avenue"),
+        ("av. J.-C", "avant Jésus-Christ"),
+        ("apr. J.-C", "après Jésus-Christ"),
+        ("art", "article"),
+        ("boul", "boulevard"),
+        ("c.-à-d", "c’est-à-dire"),
+        ("etc", "et cetera"),
+        ("ex", "exemple"),
+        ("excl", "exclusivement"),
+        ("boul", "boulevard"),
+    ]
+] + [
+    (re.compile("\\b%s" % x[0]), x[1])
+    for x in [
+        ("Mlle", "mademoiselle"),
+        ("Mlles", "mesdemoiselles"),
+        ("Mme", "Madame"),
+        ("Mmes", "Mesdames"),
+    ]
+]
+rep_map = {
+    "：": ",",
+    "；": ",",
+    "，": ",",
+    "。": ".",
+    "！": "!",
+    "？": "?",
+    "\n": ".",
+    "·": ",",
+    "、": ",",
+    "...": ".",
+    "…": ".",
+    "$": ".",
+    "“": "",
+    "”": "",
+    "‘": "",
+    "’": "",
+    "（": "",
+    "）": "",
+    "(": "",
+    ")": "",
+    "《": "",
+    "》": "",
+    "【": "",
+    "】": "",
+    "[": "",
+    "]": "",
+    "—": "",
+    "～": "-",
+    "~": "-",
+    "「": "",
+    "」": "",
+    "¿": "",
+    "¡": "",
+}
+def collapse_whitespace(text):
+    # Regular expression matching whitespace:
+    _whitespace_re = re.compile(r"\s+")
+    return re.sub(_whitespace_re, " ", text).strip()
+def remove_punctuation_at_begin(text):
+    return re.sub(r"^[,.!?]+", "", text)
+def remove_aux_symbols(text):
+    text = re.sub(r"[\<\>\(\)\[\]\"\«\»]+", "", text)
+    return text
+def replace_symbols(text):
+    text = text.replace(";", ",")
+    text = text.replace("-", " ")
+    text = text.replace(":", ",")
+    text = text.replace("&", " et ")
+    return text
+def expand_abbreviations(text):
+    for regex, replacement in _abbreviations:
+        text = re.sub(regex, replacement, text)
+    return text
+def replace_punctuation(text):
+    pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
+    replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
+    return replaced_text
+def text_normalize(text):
+    text = expand_abbreviations(text)
+    text = replace_punctuation(text)
+    text = replace_symbols(text)
+    text = remove_aux_symbols(text)
+    text = remove_punctuation_at_begin(text)
+    text = collapse_whitespace(text)
+    text = re.sub(r"([^\.,!\?\-…])$", r"\1", text)
+    return text
+def french_to_ipa(text, text_tokenizer):
+    if type(text) == str:
+        text = text_normalize(text)
+        phonemes = text_tokenizer(text)
+        return phonemes
+    else:
+        for i, t in enumerate(text):
+            text[i] = text_normalize(t)
+        return text_tokenizer(text)

diffrhythm/g2p/g2p/german.py ADDED Viewed

	@@ -0,0 +1,94 @@

+# Copyright (c) 2024 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import re
+"""
+    Text clean time
+"""
+rep_map = {
+    "：": ",",
+    "；": ",",
+    "，": ",",
+    "。": ".",
+    "！": "!",
+    "？": "?",
+    "\n": ".",
+    "·": ",",
+    "、": ",",
+    "...": ".",
+    "…": ".",
+    "$": ".",
+    "“": "",
+    "”": "",
+    "‘": "",
+    "’": "",
+    "（": "",
+    "）": "",
+    "(": "",
+    ")": "",
+    "《": "",
+    "》": "",
+    "【": "",
+    "】": "",
+    "[": "",
+    "]": "",
+    "—": "",
+    "～": "-",
+    "~": "-",
+    "「": "",
+    "」": "",
+    "¿": "",
+    "¡": "",
+}
+def collapse_whitespace(text):
+    # Regular expression matching whitespace:
+    _whitespace_re = re.compile(r"\s+")
+    return re.sub(_whitespace_re, " ", text).strip()
+def remove_punctuation_at_begin(text):
+    return re.sub(r"^[,.!?]+", "", text)
+def remove_aux_symbols(text):
+    text = re.sub(r"[\<\>\(\)\[\]\"\«\»]+", "", text)
+    return text
+def replace_symbols(text):
+    text = text.replace(";", ",")
+    text = text.replace("-", " ")
+    text = text.replace(":", ",")
+    return text
+def replace_punctuation(text):
+    pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
+    replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
+    return replaced_text
+def text_normalize(text):
+    text = replace_punctuation(text)
+    text = replace_symbols(text)
+    text = remove_aux_symbols(text)
+    text = remove_punctuation_at_begin(text)
+    text = collapse_whitespace(text)
+    text = re.sub(r"([^\.,!\?\-…])$", r"\1", text)
+    return text
+def german_to_ipa(text, text_tokenizer):
+    if type(text) == str:
+        text = text_normalize(text)
+        phonemes = text_tokenizer(text)
+        return phonemes
+    else:
+        for i, t in enumerate(text):
+            text[i] = text_normalize(t)
+        return text_tokenizer(text)

diffrhythm/g2p/g2p/japanese.py ADDED Viewed

	@@ -0,0 +1,816 @@

+# Copyright (c) 2024 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import io, re, os, sys, time, argparse, pdb, json
+from io import StringIO
+from typing import Optional
+import numpy as np
+import traceback
+import pyopenjtalk
+from pykakasi import kakasi
+punctuation = [",", ".", "!", "?", ":", ";", "'", "…"]
+jp_xphone2ipa = [
+    " a a",
+    " i i",
+    " u ɯ",
+    " e e",
+    " o o",
+    " a: aː",
+    " i: iː",
+    " u: ɯː",
+    " e: eː",
+    " o: oː",
+    " k k",
+    " s s",
+    " t t",
+    " n n",
+    " h ç",
+    " f ɸ",
+    " m m",
+    " y j",
+    " r ɾ",
+    " w ɰᵝ",
+    " N ɴ",
+    " g g",
+    " j d ʑ",
+    " z z",
+    " d d",
+    " b b",
+    " p p",
+    " q q",
+    " v v",
+    " : :",
+    " by b j",
+    " ch t ɕ",
+    " dy d e j",
+    " ty t e j",
+    " gy g j",
+    " gw g ɯ",
+    " hy ç j",
+    " ky k j",
+    " kw k ɯ",
+    " my m j",
+    " ny n j",
+    " py p j",
+    " ry ɾ j",
+    " sh ɕ",
+    " ts t s ɯ",
+]
+_mora_list_minimum: list[tuple[str, Optional[str], str]] = [
+    ("ヴォ", "v", "o"),
+    ("ヴェ", "v", "e"),
+    ("ヴィ", "v", "i"),
+    ("ヴァ", "v", "a"),
+    ("ヴ", "v", "u"),
+    ("ン", None, "N"),
+    ("ワ", "w", "a"),
+    ("ロ", "r", "o"),
+    ("レ", "r", "e"),
+    ("ル", "r", "u"),
+    ("リョ", "ry", "o"),
+    ("リュ", "ry", "u"),
+    ("リャ", "ry", "a"),
+    ("リェ", "ry", "e"),
+    ("リ", "r", "i"),
+    ("ラ", "r", "a"),
+    ("ヨ", "y", "o"),
+    ("ユ", "y", "u"),
+    ("ヤ", "y", "a"),
+    ("モ", "m", "o"),
+    ("メ", "m", "e"),
+    ("ム", "m", "u"),
+    ("ミョ", "my", "o"),
+    ("ミュ", "my", "u"),
+    ("ミャ", "my", "a"),
+    ("ミェ", "my", "e"),
+    ("ミ", "m", "i"),
+    ("マ", "m", "a"),
+    ("ポ", "p", "o"),
+    ("ボ", "b", "o"),
+    ("ホ", "h", "o"),
+    ("ペ", "p", "e"),
+    ("ベ", "b", "e"),
+    ("ヘ", "h", "e"),
+    ("プ", "p", "u"),
+    ("ブ", "b", "u"),
+    ("フォ", "f", "o"),
+    ("フェ", "f", "e"),
+    ("フィ", "f", "i"),
+    ("ファ", "f", "a"),
+    ("フ", "f", "u"),
+    ("ピョ", "py", "o"),
+    ("ピュ", "py", "u"),
+    ("ピャ", "py", "a"),
+    ("ピェ", "py", "e"),
+    ("ピ", "p", "i"),
+    ("ビョ", "by", "o"),
+    ("ビュ", "by", "u"),
+    ("ビャ", "by", "a"),
+    ("ビェ", "by", "e"),
+    ("ビ", "b", "i"),
+    ("ヒョ", "hy", "o"),
+    ("ヒュ", "hy", "u"),
+    ("ヒャ", "hy", "a"),
+    ("ヒェ", "hy", "e"),
+    ("ヒ", "h", "i"),
+    ("パ", "p", "a"),
+    ("バ", "b", "a"),
+    ("ハ", "h", "a"),
+    ("ノ", "n", "o"),
+    ("ネ", "n", "e"),
+    ("ヌ", "n", "u"),
+    ("ニョ", "ny", "o"),
+    ("ニュ", "ny", "u"),
+    ("ニャ", "ny", "a"),
+    ("ニェ", "ny", "e"),
+    ("ニ", "n", "i"),
+    ("ナ", "n", "a"),
+    ("ドゥ", "d", "u"),
+    ("ド", "d", "o"),
+    ("トゥ", "t", "u"),
+    ("ト", "t", "o"),
+    ("デョ", "dy", "o"),
+    ("デュ", "dy", "u"),
+    ("デャ", "dy", "a"),
+    # ("デェ", "dy", "e"),
+    ("ディ", "d", "i"),
+    ("デ", "d", "e"),
+    ("テョ", "ty", "o"),
+    ("テュ", "ty", "u"),
+    ("テャ", "ty", "a"),
+    ("ティ", "t", "i"),
+    ("テ", "t", "e"),
+    ("ツォ", "ts", "o"),
+    ("ツェ", "ts", "e"),
+    ("ツィ", "ts", "i"),
+    ("ツァ", "ts", "a"),
+    ("ツ", "ts", "u"),
+    ("ッ", None, "q"),  # 「cl」から「q」に変更
+    ("チョ", "ch", "o"),
+    ("チュ", "ch", "u"),
+    ("チャ", "ch", "a"),
+    ("チェ", "ch", "e"),
+    ("チ", "ch", "i"),
+    ("ダ", "d", "a"),
+    ("タ", "t", "a"),
+    ("ゾ", "z", "o"),
+    ("ソ", "s", "o"),
+    ("ゼ", "z", "e"),
+    ("セ", "s", "e"),
+    ("ズィ", "z", "i"),
+    ("ズ", "z", "u"),
+    ("スィ", "s", "i"),
+    ("ス", "s", "u"),
+    ("ジョ", "j", "o"),
+    ("ジュ", "j", "u"),
+    ("ジャ", "j", "a"),
+    ("ジェ", "j", "e"),
+    ("ジ", "j", "i"),
+    ("ショ", "sh", "o"),
+    ("シュ", "sh", "u"),
+    ("シャ", "sh", "a"),
+    ("シェ", "sh", "e"),
+    ("シ", "sh", "i"),
+    ("ザ", "z", "a"),
+    ("サ", "s", "a"),
+    ("ゴ", "g", "o"),
+    ("コ", "k", "o"),
+    ("ゲ", "g", "e"),
+    ("ケ", "k", "e"),
+    ("グヮ", "gw", "a"),
+    ("グ", "g", "u"),
+    ("クヮ", "kw", "a"),
+    ("ク", "k", "u"),
+    ("ギョ", "gy", "o"),
+    ("ギュ", "gy", "u"),
+    ("ギャ", "gy", "a"),
+    ("ギェ", "gy", "e"),
+    ("ギ", "g", "i"),
+    ("キョ", "ky", "o"),
+    ("キュ", "ky", "u"),
+    ("キャ", "ky", "a"),
+    ("キェ", "ky", "e"),
+    ("キ", "k", "i"),
+    ("ガ", "g", "a"),
+    ("カ", "k", "a"),
+    ("オ", None, "o"),
+    ("エ", None, "e"),
+    ("ウォ", "w", "o"),
+    ("ウェ", "w", "e"),
+    ("ウィ", "w", "i"),
+    ("ウ", None, "u"),
+    ("イェ", "y", "e"),
+    ("イ", None, "i"),
+    ("ア", None, "a"),
+]
+_mora_list_additional: list[tuple[str, Optional[str], str]] = [
+    ("ヴョ", "by", "o"),
+    ("ヴュ", "by", "u"),
+    ("ヴャ", "by", "a"),
+    ("ヲ", None, "o"),
+    ("ヱ", None, "e"),
+    ("ヰ", None, "i"),
+    ("ヮ", "w", "a"),
+    ("ョ", "y", "o"),
+    ("ュ", "y", "u"),
+    ("ヅ", "z", "u"),
+    ("ヂ", "j", "i"),
+    ("ヶ", "k", "e"),
+    ("ャ", "y", "a"),
+    ("ォ", None, "o"),
+    ("ェ", None, "e"),
+    ("ゥ", None, "u"),
+    ("ィ", None, "i"),
+    ("ァ", None, "a"),
+]
+# 例: "vo" -> "ヴォ", "a" -> "ア"
+mora_phonemes_to_mora_kata: dict[str, str] = {
+    (consonant or "") + vowel: kana for [kana, consonant, vowel] in _mora_list_minimum
+}
+# 例: "ヴォ" -> ("v", "o"), "ア" -> (None, "a")
+mora_kata_to_mora_phonemes: dict[str, tuple[Optional[str], str]] = {
+    kana: (consonant, vowel)
+    for [kana, consonant, vowel] in _mora_list_minimum + _mora_list_additional
+}
+# 正規化で記号を変換するための辞書
+rep_map = {
+    "：": ":",
+    "；": ";",
+    "，": ",",
+    "。": ".",
+    "！": "!",
+    "？": "?",
+    "\n": ".",
+    "．": ".",
+    "⋯": "…",
+    "···": "…",
+    "・・・": "…",
+    "·": ",",
+    "・": ",",
+    "•": ",",
+    "、": ",",
+    "$": ".",
+    # "“": "'",
+    # "”": "'",
+    # '"': "'",
+    "‘": "'",
+    "’": "'",
+    # "（": "'",
+    # "）": "'",
+    # "(": "'",
+    # ")": "'",
+    # "《": "'",
+    # "》": "'",
+    # "【": "'",
+    # "】": "'",
+    # "[": "'",
+    # "]": "'",
+    # "——": "-",
+    # "−": "-",
+    # "-": "-",
+    # "『": "'",
+    # "』": "'",
+    # "〈": "'",
+    # "〉": "'",
+    # "«": "'",
+    # "»": "'",
+    # # "～": "-",  # これは長音記号「ー」として扱うよう変更
+    # # "~": "-",  # これは長音記号「ー」として扱うよう変更
+    # "「": "'",
+    # "」": "'",
+}
+def _numeric_feature_by_regex(regex, s):
+    match = re.search(regex, s)
+    if match is None:
+        return -50
+    return int(match.group(1))
+def replace_punctuation(text: str) -> str:
+    """句読点等を「.」「,」「!」「?」「'」「-」に正規化し、OpenJTalkで読みが取得できるもののみ残す：
+    漢字・平仮名・カタカナ、アルファベット、ギリシャ文字
+    """
+    pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
+    # print("before: ", text)
+    # 句読点を辞書で置換
+    replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
+    replaced_text = re.sub(
+        # ↓ ひらがな、カタカナ、漢字
+        r"[^\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF\u3400-\u4DBF\u3005"
+        # ↓ 半角アルファベット（大文字と小文字）
+        + r"\u0041-\u005A\u0061-\u007A"
+        # ↓ 全角アルファベット（大文字と小文字）
+        + r"\uFF21-\uFF3A\uFF41-\uFF5A"
+        # ↓ ギリシャ文字
+        + r"\u0370-\u03FF\u1F00-\u1FFF"
+        # ↓ "!", "?", "…", ",", ".", "'", "-", 但し`…`はすでに`...`に変換されている
+        + "".join(punctuation) + r"]+",
+        # 上述以外の文字を削除
+        "",
+        replaced_text,
+    )
+    # print("after: ", replaced_text)
+    return replaced_text
+def fix_phone_tone(phone_tone_list: list[tuple[str, int]]) -> list[tuple[str, int]]:
+    """
+    `phone_tone_list`のtone（アクセントの値）を0か1の範囲に修正する。
+    例: [(a, 0), (i, -1), (u, -1)] → [(a, 1), (i, 0), (u, 0)]
+    """
+    tone_values = set(tone for _, tone in phone_tone_list)
+    if len(tone_values) == 1:
+        assert tone_values == {0}, tone_values
+        return phone_tone_list
+    elif len(tone_values) == 2:
+        if tone_values == {0, 1}:
+            return phone_tone_list
+        elif tone_values == {-1, 0}:
+            return [
+                (letter, 0 if tone == -1 else 1) for letter, tone in phone_tone_list
+            ]
+        else:
+            raise ValueError(f"Unexpected tone values: {tone_values}")
+    else:
+        raise ValueError(f"Unexpected tone values: {tone_values}")
+def fix_phone_tone_wplen(phone_tone_list, word_phone_length_list):
+    phones = []
+    tones = []
+    w_p_len = []
+    p_len = len(phone_tone_list)
+    idx = 0
+    w_idx = 0
+    while idx < p_len:
+        offset = 0
+        if phone_tone_list[idx] == "▁":
+            w_p_len.append(w_idx + 1)
+        curr_w_p_len = word_phone_length_list[w_idx]
+        for i in range(curr_w_p_len):
+            p, t = phone_tone_list[idx]
+            if p == ":" and len(phones) > 0:
+                if phones[-1][-1] != ":":
+                    phones[-1] += ":"
+                    offset -= 1
+            else:
+                phones.append(p)
+                tones.append(str(t))
+            idx += 1
+            if idx >= p_len:
+                break
+        w_p_len.append(curr_w_p_len + offset)
+        w_idx += 1
+        # print(w_p_len)
+    return phones, tones, w_p_len
+def g2phone_tone_wo_punct(prosodies) -> list[tuple[str, int]]:
+    """
+    テキストに対して、音素とアクセント（0か1）のペアのリストを返す。
+    ただし「!」「.」「?」等の非音素記号(punctuation)は全て消える（ポーズ記号も残さない）。
+    非音素記号を含める処理は`align_tones()`で行われる。
+    また「っ」は「cl」でなく「q」に変換される（「ん」は「N」のまま）。
+    例: "こんにちは、世界ー。。元気？！" →
+    [('k', 0), ('o', 0), ('N', 1), ('n', 1), ('i', 1), ('ch', 1), ('i', 1), ('w', 1), ('a', 1), ('s', 1), ('e', 1), ('k', 0), ('a', 0), ('i', 0), ('i', 0), ('g', 1), ('e', 1), ('N', 0), ('k', 0), ('i', 0)]
+    """
+    result: list[tuple[str, int]] = []
+    current_phrase: list[tuple[str, int]] = []
+    current_tone = 0
+    last_accent = ""
+    for i, letter in enumerate(prosodies):
+        # 特殊記号の処理
+        # 文頭記号、無視する
+        if letter == "^":
+            assert i == 0, "Unexpected ^"
+        # アクセント句の終わりに来る記号
+        elif letter in ("$", "?", "_", "#"):
+            # 保持しているフレーズを、アクセント数値を0-1に修正し結果に追加
+            result.extend(fix_phone_tone(current_phrase))
+            # 末尾に来る終了記号、無視（文中の疑問文は`_`になる）
+            if letter in ("$", "?"):
+                assert i == len(prosodies) - 1, f"Unexpected {letter}"
+            # あとは"_"（ポーズ）と"#"（アクセント句の境界）のみ
+            # これらは残さず、次のアクセント句に備える。
+            current_phrase = []
+            # 0を基準点にしてそこから上昇・下降する（負の場合は上の`fix_phone_tone`で直る）
+            current_tone = 0
+            last_accent = ""
+        # アクセント上昇記号
+        elif letter == "[":
+            if last_accent != letter:
+                current_tone = current_tone + 1
+            last_accent = letter
+        # アクセント下降記号
+        elif letter == "]":
+            if last_accent != letter:
+                current_tone = current_tone - 1
+            last_accent = letter
+        # それ以外は通常の音素
+        else:
+            if letter == "cl":  # 「っ」の処理
+                letter = "q"
+            current_phrase.append((letter, current_tone))
+    return result
+def handle_long(sep_phonemes: list[list[str]]) -> list[list[str]]:
+    for i in range(len(sep_phonemes)):
+        if sep_phonemes[i][0] == "ー":
+            # sep_phonemes[i][0] = sep_phonemes[i - 1][-1]
+            sep_phonemes[i][0] = ":"
+        if "ー" in sep_phonemes[i]:
+            for j in range(len(sep_phonemes[i])):
+                if sep_phonemes[i][j] == "ー":
+                    # sep_phonemes[i][j] = sep_phonemes[i][j - 1][-1]
+                    sep_phonemes[i][j] = ":"
+    return sep_phonemes
+def handle_long_word(sep_phonemes: list[list[str]]) -> list[list[str]]:
+    res = []
+    for i in range(len(sep_phonemes)):
+        if sep_phonemes[i][0] == "ー":
+            sep_phonemes[i][0] = sep_phonemes[i - 1][-1]
+            # sep_phonemes[i][0] = ':'
+        if "ー" in sep_phonemes[i]:
+            for j in range(len(sep_phonemes[i])):
+                if sep_phonemes[i][j] == "ー":
+                    sep_phonemes[i][j] = sep_phonemes[i][j - 1][-1]
+                    # sep_phonemes[i][j] = ':'
+        res.append(sep_phonemes[i])
+        res.append("▁")
+    return res
+def align_tones(
+    phones_with_punct: list[str], phone_tone_list: list[tuple[str, int]]
+) -> list[tuple[str, int]]:
+    """
+    例:
+    …私は、、そう思う。
+    phones_with_punct:
+    [".", ".", ".", "w", "a", "t", "a", "sh", "i", "w", "a", ",", ",", "s", "o", "o", "o", "m", "o", "u", "."]
+    phone_tone_list:
+    [("w", 0), ("a", 0), ("t", 1), ("a", 1), ("sh", 1), ("i", 1), ("w", 1), ("a", 1), ("s", 0), ("o", 0), ("o", 1), ("o", 1), ("m", 1), ("o", 1), ("u", 0))]
+    Return:
+    [(".", 0), (".", 0), (".", 0), ("w", 0), ("a", 0), ("t", 1), ("a", 1), ("sh", 1), ("i", 1), ("w", 1), ("a", 1), (",", 0), (",", 0), ("s", 0), ("o", 0), ("o", 1), ("o", 1), ("m", 1), ("o", 1), ("u", 0), (".", 0)]
+    """
+    result: list[tuple[str, int]] = []
+    tone_index = 0
+    for phone in phones_with_punct:
+        if tone_index >= len(phone_tone_list):
+            # 余ったpunctuationがある場合 → (punctuation, 0)を追加
+            result.append((phone, 0))
+        elif phone == phone_tone_list[tone_index][0]:
+            # phone_tone_listの現在の音素と一致する場合 → toneをそこから取得、(phone, tone)を追加
+            result.append((phone, phone_tone_list[tone_index][1]))
+            # 探すindexを1つ進める
+            tone_index += 1
+        elif phone in punctuation or phone == "▁":
+            # phoneがpunctuationの場合 → (phone, 0)を追加
+            result.append((phone, 0))
+        else:
+            print(f"phones: {phones_with_punct}")
+            print(f"phone_tone_list: {phone_tone_list}")
+            print(f"result: {result}")
+            print(f"tone_index: {tone_index}")
+            print(f"phone: {phone}")
+            raise ValueError(f"Unexpected phone: {phone}")
+    return result
+def kata2phoneme_list(text: str) -> list[str]:
+    """
+    原則カタカナの`text`を受け取り、それをそのままいじらずに音素記号のリストに変換。
+    注意点：
+    - punctuationが来た場合（punctuationが1文字の場合がありうる）、処理せず1文字のリストを返す
+    - 冒頭に続く「ー」はそのまま「ー」のままにする（`handle_long()`で処理される）
+    - 文中の「ー」は前の音素記号の最後の音素記号に変換される。
+    例：
+    `ーーソーナノカーー` → ["ー", "ー", "s", "o", "o", "n", "a", "n", "o", "k", "a", "a", "a"]
+    `?` → ["?"]
+    """
+    if text in punctuation:
+        return [text]
+    # `text`がカタカナ（`ー`含む）のみからなるかどうかをチェック
+    if re.fullmatch(r"[\u30A0-\u30FF]+", text) is None:
+        raise ValueError(f"Input must be katakana only: {text}")
+    sorted_keys = sorted(mora_kata_to_mora_phonemes.keys(), key=len, reverse=True)
+    pattern = "|".join(map(re.escape, sorted_keys))
+    def mora2phonemes(mora: str) -> str:
+        cosonant, vowel = mora_kata_to_mora_phonemes[mora]
+        if cosonant is None:
+            return f" {vowel}"
+        return f" {cosonant} {vowel}"
+    spaced_phonemes = re.sub(pattern, lambda m: mora2phonemes(m.group()), text)
+    # 長音記号「ー」の処理
+    long_pattern = r"(\w)(ー*)"
+    long_replacement = lambda m: m.group(1) + (" " + m.group(1)) * len(m.group(2))
+    spaced_phonemes = re.sub(long_pattern, long_replacement, spaced_phonemes)
+    # spaced_phonemes += ' ▁'
+    return spaced_phonemes.strip().split(" ")
+def frontend2phoneme(labels, drop_unvoiced_vowels=False):
+    N = len(labels)
+    phones = []
+    for n in range(N):
+        lab_curr = labels[n]
+        # print(lab_curr)
+        # current phoneme
+        p3 = re.search(r"\-(.*?)\+", lab_curr).group(1)
+        # deal unvoiced vowels as normal vowels
+        if drop_unvoiced_vowels and p3 in "AEIOU":
+            p3 = p3.lower()
+        # deal with sil at the beginning and the end of text
+        if p3 == "sil":
+            # assert n == 0 or n == N - 1
+            # if n == 0:
+            #     phones.append("^")
+            # elif n == N - 1:
+            #     # check question form or not
+            #     e3 = _numeric_feature_by_regex(r"!(\d+)_", lab_curr)
+            #     if e3 == 0:
+            #         phones.append("$")
+            #     elif e3 == 1:
+            #         phones.append("?")
+            continue
+        elif p3 == "pau":
+            phones.append("_")
+            continue
+        else:
+            phones.append(p3)
+        # accent type and position info (forward or backward)
+        a1 = _numeric_feature_by_regex(r"/A:([0-9\-]+)\+", lab_curr)
+        a2 = _numeric_feature_by_regex(r"\+(\d+)\+", lab_curr)
+        a3 = _numeric_feature_by_regex(r"\+(\d+)/", lab_curr)
+        # number of mora in accent phrase
+        f1 = _numeric_feature_by_regex(r"/F:(\d+)_", lab_curr)
+        a2_next = _numeric_feature_by_regex(r"\+(\d+)\+", labels[n + 1])
+        # accent phrase border
+        # print(p3, a1, a2, a3, f1, a2_next, lab_curr)
+        if a3 == 1 and a2_next == 1 and p3 in "aeiouAEIOUNcl":
+            phones.append("#")
+        # pitch falling
+        elif a1 == 0 and a2_next == a2 + 1 and a2 != f1:
+            phones.append("]")
+        # pitch rising
+        elif a2 == 1 and a2_next == 2:
+            phones.append("[")
+    # phones = ' '.join(phones)
+    return phones
+class JapanesePhoneConverter(object):
+    def __init__(self, lexicon_path=None, ipa_dict_path=None):
+        # lexicon_lines = open(lexicon_path, 'r', encoding='utf-8').readlines()
+        # self.lexicon = {}
+        # self.single_dict = {}
+        # self.double_dict = {}
+        # for curr_line in lexicon_lines:
+        #     k,v = curr_line.strip().split('+',1)
+        #     self.lexicon[k] = v
+        #     if len(k) == 2:
+        #         self.double_dict[k] = v
+        #     elif len(k) == 1:
+        #         self.single_dict[k] = v
+        self.ipa_dict = {}
+        for curr_line in jp_xphone2ipa:
+            k, v = curr_line.strip().split(" ", 1)
+            self.ipa_dict[k] = re.sub("\s", "", v)
+        # kakasi1 = kakasi()
+        # kakasi1.setMode("H","K")
+        # kakasi1.setMode("J","K")
+        # kakasi1.setMode("r","Hepburn")
+        self.japan_JH2K = kakasi()
+        self.table = {ord(f): ord(t) for f, t in zip("67", "_¯")}
+    def text2sep_kata(self, parsed) -> tuple[list[str], list[str]]:
+        """
+        `text_normalize`で正規化済みの`norm_text`を受け取り、それを単語分割し、
+        分割された単語リストとその読み（カタカナor記号1文字）のリス���のタプルを返す。
+        単語分割結果は、`g2p()`の`word2ph`で1文字あたりに割り振る音素記号の数を決めるために使う。
+        例:
+        `私はそう思う!って感じ?` →
+        ["私", "は", "そう", "思う", "!", "って", "感じ", "?"], ["ワタシ", "ワ", "ソー", "オモウ", "!", "ッテ", "カンジ", "?"]
+        """
+        # parsed: OpenJTalkの解析結果
+        sep_text: list[str] = []
+        sep_kata: list[str] = []
+        fix_parsed = []
+        i = 0
+        while i <= len(parsed) - 1:
+            # word: 実際の単語の文字列
+            # yomi: その読み、但し無声化サインの`’`は除去
+            # print(parsed)
+            yomi = parsed[i]["pron"]
+            tmp_parsed = parsed[i]
+            if i != len(parsed) - 1 and parsed[i + 1]["string"] in [
+                "々",
+                "ゝ",
+                "ヽ",
+                "ゞ",
+                "ヾ",
+                "゛",
+            ]:
+                word = parsed[i]["string"] + parsed[i + 1]["string"]
+                i += 1
+            else:
+                word = parsed[i]["string"]
+            word, yomi = replace_punctuation(word), yomi.replace("’", "")
+            """
+            ここで`yomi`の取りうる値は以下の通りのはず。
+            - `word`が通常単語 → 通常の読み（カタカナ）
+                （カタカナからなり、長音記号も含みうる、`アー` 等）
+            - `word`が`ー` から始まる → `ーラー` や `ーーー` など
+            - `word`が句読点や空白等 → `、`
+            - `word`が`?` → `？`（全角になる）
+            他にも`word`が読めないキリル文字アラビア文字等が来ると`、`になるが、正規化でこの場合は起きないはず。
+            また元のコードでは`yomi`が空白の場合の処理があったが、これは起きないはず。
+            処理すべきは`yomi`が`、`の場合のみのはず。
+            """
+            assert yomi != "", f"Empty yomi: {word}"
+            if yomi == "、":
+                # wordは正規化されているので、`.`, `,`, `!`, `'`, `-`のいずれか
+                if word not in (
+                    ".",
+                    ",",
+                    "!",
+                    "'",
+                    "-",
+                    "?",
+                    ":",
+                    ";",
+                    "…",
+                    "",
+                ):
+                    # ここはpyopenjtalkが読めない文字等のときに起こる
+                    #print(
+                    #    "{}Cannot read:{}, yomi:{}, new_word:{};".format(
+                    #        parsed, word, yomi, self.japan_JH2K.convert(word)[0]["kana"]
+                    #    )
+                    #)
+                    # raise ValueError(word)
+                    word = self.japan_JH2K.convert(word)[0]["kana"]
+                    # print(word, self.japan_JH2K.convert(word)[0]['kana'], kata2phoneme_list(self.japan_JH2K.convert(word)[0]['kana']))
+                    tmp_parsed["pron"] = word
+                    # yomi = "-"
+                    # word = ','
+                # yomiは元の記号のままに変更
+                # else:
+                #     parsed[i]['pron'] = parsed[i]["string"]
+                yomi = word
+            elif yomi == "？":
+                assert word == "?", f"yomi `？` comes from: {word}"
+                yomi = "?"
+            if word == "":
+                i += 1
+                continue
+            sep_text.append(word)
+            sep_kata.append(yomi)
+            # print(word, yomi, parts)
+            fix_parsed.append(tmp_parsed)
+            i += 1
+        # print(sep_text, sep_kata)
+        return sep_text, sep_kata, fix_parsed
+    def getSentencePhone(self, sentence, blank_mode=True, phoneme_mode=False):
+        # print("origin:", sentence)
+        words = []
+        words_phone_len = []
+        short_char_flag = False
+        output_duration_flag = []
+        output_before_sil_flag = []
+        normed_text = []
+        sentence = sentence.strip().strip("'")
+        sentence = re.sub(r"\s+", "", sentence)
+        output_res = []
+        failed_words = []
+        last_long_pause = 4
+        last_word = None
+        frontend_text = pyopenjtalk.run_frontend(sentence)
+        # print("frontend_text: ", frontend_text)
+        try:
+            frontend_text = pyopenjtalk.estimate_accent(frontend_text)
+        except:
+            pass
+        # print("estimate_accent: ", frontend_text)
+        # sep_text: 単語単位の単語のリスト
+        # sep_kata: 単語単位の単語のカタカナ読みのリスト
+        sep_text, sep_kata, frontend_text = self.text2sep_kata(frontend_text)
+        # print("sep_text: ", sep_text)
+        # print("sep_kata: ", sep_kata)
+        # print("frontend_text: ", frontend_text)
+        # sep_phonemes: 各単語ご���の音素のリストのリスト
+        sep_phonemes = handle_long_word([kata2phoneme_list(i) for i in sep_kata])
+        # print("sep_phonemes: ", sep_phonemes)
+        pron_text = [x["pron"].strip().replace("’", "") for x in frontend_text]
+        # pdb.set_trace()
+        prosodys = pyopenjtalk.make_label(frontend_text)
+        prosodys = frontend2phoneme(prosodys, drop_unvoiced_vowels=True)
+        # print("prosodys: ", ' '.join(prosodys))
+        # print("pron_text: ", pron_text)
+        normed_text = [x["string"].strip() for x in frontend_text]
+        # punctuationがすべて消えた、音素とアクセントのタプルのリスト
+        phone_tone_list_wo_punct = g2phone_tone_wo_punct(prosodys)
+        # print("phone_tone_list_wo_punct: ", phone_tone_list_wo_punct)
+        # phone_w_punct: sep_phonemesを結合した、punctuationを元のまま保持した音素列
+        phone_w_punct: list[str] = []
+        w_p_len = []
+        for i in sep_phonemes:
+            phone_w_punct += i
+            w_p_len.append(len(i))
+        phone_w_punct = phone_w_punct[:-1]
+        # punctuation無しのアクセント情報を使って、punctuationを含めたアクセント情報を作る
+        # print("phone_w_punct: ", phone_w_punct)
+        # print("phone_tone_list_wo_punct: ", phone_tone_list_wo_punct)
+        phone_tone_list = align_tones(phone_w_punct, phone_tone_list_wo_punct)
+        jp_item = {}
+        jp_p = ""
+        jp_t = ""
+        # mye rye pye bye nye
+        # je she
+        # print(phone_tone_list)
+        for p, t in phone_tone_list:
+            if p in self.ipa_dict:
+                curr_p = self.ipa_dict[p]
+                jp_p += curr_p
+                jp_t += str(t + 6) * len(curr_p)
+            elif p in punctuation:
+                jp_p += p
+                jp_t += "0"
+            elif p == "▁":
+                jp_p += p
+                jp_t += " "
+            else:
+                print(p, t)
+            jp_p += "|"
+            jp_t += "0"
+        # return phones, tones, w_p_len
+        jp_p = jp_p.replace("▁", " ")
+        jp_t = jp_t.translate(self.table)
+        jp_l = ""
+        for t in jp_t:
+            if t == " ":
+                jp_l += " "
+            else:
+                jp_l += "2"
+        # print(jp_p)
+        # print(jp_t)
+        # print(jp_l)
+        # print(len(jp_p_len), sum(w_p_len),  len(jp_p), sum(jp_p_len))
+        assert len(jp_p) == len(jp_t) and len(jp_p) == len(jp_l)
+        jp_item["jp_p"] = jp_p.replace("| |", "|").rstrip("|")
+        jp_item["jp_t"] = jp_t
+        jp_item["jp_l"] = jp_l
+        jp_item["jp_normed_text"] = " ".join(normed_text)
+        jp_item["jp_pron_text"] = " ".join(pron_text)
+        # jp_item['jp_ruoma'] = sep_phonemes
+        # print(len(normed_text), len(sep_phonemes))
+        # print(normed_text)
+        return jp_item
+jpc = JapanesePhoneConverter()
+def japanese_to_ipa(text, text_tokenizer):
+    # phonemes = text_tokenizer(text)
+    if type(text) == str:
+        return jpc.getSentencePhone(text)["jp_p"]
+    else:
+        result_ph = []
+        for t in text:
+            result_ph.append(jpc.getSentencePhone(t)["jp_p"])
+        return result_ph

diffrhythm/g2p/g2p/korean.py ADDED Viewed

	@@ -0,0 +1,81 @@

+# Copyright (c) 2024 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import re
+"""
+    Text clean time
+"""
+english_dictionary = {
+    "KOREA": "코리아",
+    "IDOL": "아이돌",
+    "IT": "아이티",
+    "IQ": "아이큐",
+    "UP": "업",
+    "DOWN": "다운",
+    "PC": "피씨",
+    "CCTV": "씨씨티비",
+    "SNS": "에스엔에스",
+    "AI": "에이아이",
+    "CEO": "씨이오",
+    "A": "에이",
+    "B": "비",
+    "C": "씨",
+    "D": "디",
+    "E": "이",
+    "F": "에프",
+    "G": "지",
+    "H": "에이치",
+    "I": "아이",
+    "J": "제이",
+    "K": "케이",
+    "L": "엘",
+    "M": "엠",
+    "N": "엔",
+    "O": "오",
+    "P": "피",
+    "Q": "큐",
+    "R": "알",
+    "S": "에스",
+    "T": "티",
+    "U": "유",
+    "V": "브이",
+    "W": "더블유",
+    "X": "엑스",
+    "Y": "와이",
+    "Z": "제트",
+}
+def normalize(text):
+    text = text.strip()
+    text = re.sub(
+        "[⺀-⺙⺛-⻳⼀-⿕々〇〡-〩〸-〺〻㐀-䶵一-鿃豈-鶴侮-頻並-龎]", "", text
+    )
+    text = normalize_english(text)
+    text = text.lower()
+    return text
+def normalize_english(text):
+    def fn(m):
+        word = m.group()
+        if word in english_dictionary:
+            return english_dictionary.get(word)
+        return word
+    text = re.sub("([A-Za-z]+)", fn, text)
+    return text
+def korean_to_ipa(text, text_tokenizer):
+    if type(text) == str:
+        text = normalize(text)
+        phonemes = text_tokenizer(text)
+        return phonemes
+    else:
+        for i, t in enumerate(text):
+            text[i] = normalize(t)
+        return text_tokenizer(text)

diffrhythm/g2p/g2p/mandarin.py ADDED Viewed

	@@ -0,0 +1,600 @@

+# Copyright (c) 2024 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import re
+import jieba
+import cn2an
+from pypinyin import lazy_pinyin, BOPOMOFO
+from typing import List
+from diffrhythm.g2p.g2p.chinese_model_g2p import BertPolyPredict
+from diffrhythm.g2p.utils.front_utils import *
+import os
+from huggingface_hub import hf_hub_download
+# from g2pw import G2PWConverter
+# set blank level, {0："none",1:"char", 2:"word"}
+BLANK_LEVEL = 0
+# conv = G2PWConverter(style='pinyin', enable_non_tradional_chinese=True)
+resource_path = r"./diffrhythm/g2p"
+poly_all_class_path = os.path.join(
+    resource_path, "sources", "g2p_chinese_model", "polychar.txt"
+)
+if not os.path.exists(poly_all_class_path):
+    print(
+        "Incorrect path for polyphonic character class dictionary: {}, please check...".format(
+            poly_all_class_path
+        )
+    )
+    exit()
+poly_dict = generate_poly_lexicon(poly_all_class_path)
+# Set up G2PW model parameters
+g2pw_poly_model_path = os.path.join(resource_path, "sources", "g2p_chinese_model")
+if not os.path.exists(g2pw_poly_model_path):
+    print(
+        "Incorrect path for g2pw polyphonic character model: {}, please check...".format(
+            g2pw_poly_model_path
+        )
+    )
+    exit()
+json_file_path = os.path.join(
+    resource_path, "sources", "g2p_chinese_model", "polydict.json"
+)
+if not os.path.exists(json_file_path):
+    print(
+        "Incorrect path for g2pw id to pinyin dictionary: {}, please check...".format(
+            json_file_path
+        )
+    )
+    exit()
+jsonr_file_path = os.path.join(
+    resource_path, "sources", "g2p_chinese_model", "polydict_r.json"
+)
+if not os.path.exists(jsonr_file_path):
+    print(
+        "Incorrect path for g2pw pinyin to id dictionary: {}, please check...".format(
+            jsonr_file_path
+        )
+    )
+    exit()
+g2pw_poly_predict = BertPolyPredict(
+    g2pw_poly_model_path, jsonr_file_path, json_file_path
+)
+"""
+    Text clean time
+"""
+# List of (Latin alphabet, bopomofo) pairs:
+_latin_to_bopomofo = [
+    (re.compile("%s" % x[0], re.IGNORECASE), x[1])
+    for x in [
+        ("a", "ㄟˉ"),
+        ("b", "ㄅㄧˋ"),
+        ("c", "ㄙㄧˉ"),
+        ("d", "ㄉㄧˋ"),
+        ("e", "ㄧˋ"),
+        ("f", "ㄝˊㄈㄨˋ"),
+        ("g", "ㄐㄧˋ"),
+        ("h", "ㄝˇㄑㄩˋ"),
+        ("i", "ㄞˋ"),
+        ("j", "ㄐㄟˋ"),
+        ("k", "ㄎㄟˋ"),
+        ("l", "ㄝˊㄛˋ"),
+        ("m", "ㄝˊㄇㄨˋ"),
+        ("n", "ㄣˉ"),
+        ("o", "ㄡˉ"),
+        ("p", "ㄆㄧˉ"),
+        ("q", "ㄎㄧㄡˉ"),
+        ("r", "ㄚˋ"),
+        ("s", "ㄝˊㄙˋ"),
+        ("t", "ㄊㄧˋ"),
+        ("u", "ㄧㄡˉ"),
+        ("v", "ㄨㄧˉ"),
+        ("w", "ㄉㄚˋㄅㄨˋㄌㄧㄡˋ"),
+        ("x", "ㄝˉㄎㄨˋㄙˋ"),
+        ("y", "ㄨㄞˋ"),
+        ("z", "ㄗㄟˋ"),
+    ]
+]
+# List of (bopomofo, ipa) pairs:
+_bopomofo_to_ipa = [
+    (re.compile("%s" % x[0]), x[1])
+    for x in [
+        ("ㄅㄛ", "p⁼wo"),
+        ("ㄆㄛ", "pʰwo"),
+        ("ㄇㄛ", "mwo"),
+        ("ㄈㄛ", "fwo"),
+        ("ㄧㄢ", "|jɛn"),
+        ("ㄩㄢ", "|ɥæn"),
+        ("ㄧㄣ", "|in"),
+        ("ㄩㄣ", "|ɥn"),
+        ("ㄧㄥ", "|iŋ"),
+        ("ㄨㄥ", "|ʊŋ"),
+        ("ㄩㄥ", "|jʊŋ"),
+        # Add
+        ("ㄧㄚ", "|ia"),
+        ("ㄧㄝ", "|iɛ"),
+        ("ㄧㄠ", "|iɑʊ"),
+        ("ㄧㄡ", "|ioʊ"),
+        ("ㄧㄤ", "|iɑŋ"),
+        ("ㄨㄚ", "|ua"),
+        ("ㄨㄛ", "|uo"),
+        ("ㄨㄞ", "|uaɪ"),
+        ("ㄨㄟ", "|ueɪ"),
+        ("ㄨㄢ", "|uan"),
+        ("ㄨㄣ", "|uən"),
+        ("ㄨㄤ", "|uɑŋ"),
+        ("ㄩㄝ", "|ɥɛ"),
+        # End
+        ("ㄅ", "p⁼"),
+        ("ㄆ", "pʰ"),
+        ("ㄇ", "m"),
+        ("ㄈ", "f"),
+        ("ㄉ", "t⁼"),
+        ("ㄊ", "tʰ"),
+        ("ㄋ", "n"),
+        ("ㄌ", "l"),
+        ("ㄍ", "k⁼"),
+        ("ㄎ", "kʰ"),
+        ("ㄏ", "x"),
+        ("ㄐ", "tʃ⁼"),
+        ("ㄑ", "tʃʰ"),
+        ("ㄒ", "ʃ"),
+        ("ㄓ", "ts`⁼"),
+        ("ㄔ", "ts`ʰ"),
+        ("ㄕ", "s`"),
+        ("ㄖ", "ɹ`"),
+        ("ㄗ", "ts⁼"),
+        ("ㄘ", "tsʰ"),
+        ("ㄙ", "|s"),
+        ("ㄚ", "|a"),
+        ("ㄛ", "|o"),
+        ("ㄜ", "|ə"),
+        ("ㄝ", "|ɛ"),
+        ("ㄞ", "|aɪ"),
+        ("ㄟ", "|eɪ"),
+        ("ㄠ", "|ɑʊ"),
+        ("ㄡ", "|oʊ"),
+        ("ㄢ", "|an"),
+        ("ㄣ", "|ən"),
+        ("ㄤ", "|ɑŋ"),
+        ("ㄥ", "|əŋ"),
+        ("ㄦ", "əɹ"),
+        ("ㄧ", "|i"),
+        ("ㄨ", "|u"),
+        ("ㄩ", "|ɥ"),
+        ("ˉ", "→|"),
+        ("ˊ", "↑|"),
+        ("ˇ", "↓↑|"),
+        ("ˋ", "↓|"),
+        ("˙", "|"),
+    ]
+]
+must_not_er_words = {"女儿", "老儿", "男儿", "少儿", "小儿"}
+chinese_lexicon_path =  hf_hub_download(
+                        repo_id="ASLP-lab/DiffRhythm",
+                        filename="diffrhythm/g2p/sources/chinese_lexicon.txt",
+                        repo_type="space"
+                    )
+word_pinyin_dict = {}
+with open(chinese_lexicon_path, "r", encoding="utf-8") as fread:
+    txt_list = fread.readlines()
+    for txt in txt_list:
+        word, pinyin = txt.strip().split("\t")
+        word_pinyin_dict[word] = pinyin
+    fread.close()
+pinyin_2_bopomofo_dict = {}
+with open(
+    r"./diffrhythm/g2p/sources/pinyin_2_bpmf.txt", "r", encoding="utf-8"
+) as fread:
+    txt_list = fread.readlines()
+    for txt in txt_list:
+        pinyin, bopomofo = txt.strip().split("\t")
+        pinyin_2_bopomofo_dict[pinyin] = bopomofo
+    fread.close()
+tone_dict = {
+    "0": "˙",
+    "5": "˙",
+    "1": "",
+    "2": "ˊ",
+    "3": "ˇ",
+    "4": "ˋ",
+}
+bopomofos2pinyin_dict = {}
+with open(
+    r"./diffrhythm/g2p/sources/bpmf_2_pinyin.txt", "r", encoding="utf-8"
+) as fread:
+    txt_list = fread.readlines()
+    for txt in txt_list:
+        v, k = txt.strip().split("\t")
+        bopomofos2pinyin_dict[k] = v
+    fread.close()
+def bpmf_to_pinyin(text):
+    bopomofo_list = text.split("|")
+    pinyin_list = []
+    for info in bopomofo_list:
+        pinyin = ""
+        for c in info:
+            if c in bopomofos2pinyin_dict:
+                pinyin += bopomofos2pinyin_dict[c]
+        if len(pinyin) == 0:
+            continue
+        if pinyin[-1] not in "01234":
+            pinyin += "1"
+        if pinyin[:-1] == "ve":
+            pinyin = "y" + pinyin
+        if pinyin[:-1] == "sh":
+            pinyin = pinyin[:-1] + "i" + pinyin[-1]
+        if pinyin == "sh":
+            pinyin = pinyin[:-1] + "i"
+        if pinyin[:-1] == "s":
+            pinyin = "si" + pinyin[-1]
+        if pinyin[:-1] == "c":
+            pinyin = "ci" + pinyin[-1]
+        if pinyin[:-1] == "i":
+            pinyin = "yi" + pinyin[-1]
+        if pinyin[:-1] == "iou":
+            pinyin = "you" + pinyin[-1]
+        if pinyin[:-1] == "ien":
+            pinyin = "yin" + pinyin[-1]
+        if "iou" in pinyin and pinyin[-4:-1] == "iou":
+            pinyin = pinyin[:-4] + "iu" + pinyin[-1]
+        if "uei" in pinyin:
+            if pinyin[:-1] == "uei":
+                pinyin = "wei" + pinyin[-1]
+            elif pinyin[-4:-1] == "uei":
+                pinyin = pinyin[:-4] + "ui" + pinyin[-1]
+        if "uen" in pinyin and pinyin[-4:-1] == "uen":
+            if pinyin[:-1] == "uen":
+                pinyin = "wen" + pinyin[-1]
+            elif pinyin[-4:-1] == "uei":
+                pinyin = pinyin[:-4] + "un" + pinyin[-1]
+        if "van" in pinyin and pinyin[-4:-1] == "van":
+            if pinyin[:-1] == "van":
+                pinyin = "yuan" + pinyin[-1]
+            elif pinyin[-4:-1] == "van":
+                pinyin = pinyin[:-4] + "uan" + pinyin[-1]
+        if "ueng" in pinyin and pinyin[-5:-1] == "ueng":
+            pinyin = pinyin[:-5] + "ong" + pinyin[-1]
+        if pinyin[:-1] == "veng":
+            pinyin = "yong" + pinyin[-1]
+        if "veng" in pinyin and pinyin[-5:-1] == "veng":
+            pinyin = pinyin[:-5] + "iong" + pinyin[-1]
+        if pinyin[:-1] == "ieng":
+            pinyin = "ying" + pinyin[-1]
+        if pinyin[:-1] == "u":
+            pinyin = "wu" + pinyin[-1]
+        if pinyin[:-1] == "v":
+            pinyin = "yv" + pinyin[-1]
+        if pinyin[:-1] == "ing":
+            pinyin = "ying" + pinyin[-1]
+        if pinyin[:-1] == "z":
+            pinyin = "zi" + pinyin[-1]
+        if pinyin[:-1] == "zh":
+            pinyin = "zhi" + pinyin[-1]
+        if pinyin[0] == "u":
+            pinyin = "w" + pinyin[1:]
+        if pinyin[0] == "i":
+            pinyin = "y" + pinyin[1:]
+        pinyin = pinyin.replace("ien", "in")
+        pinyin_list.append(pinyin)
+    return " ".join(pinyin_list)
+# Convert numbers to Chinese pronunciation
+def number_to_chinese(text):
+    # numbers = re.findall(r'\d+(?:\.?\d+)?', text)
+    # for number in numbers:
+    #     text = text.replace(number, cn2an.an2cn(number), 1)
+    text = cn2an.transform(text, "an2cn")
+    return text
+def normalization(text):
+    text = text.replace("，", ",")
+    text = text.replace("。", ".")
+    text = text.replace("！", "!")
+    text = text.replace("？", "?")
+    text = text.replace("；", ";")
+    text = text.replace("：", ":")
+    text = text.replace("、", ",")
+    text = text.replace("‘", "'")
+    text = text.replace("’", "'")
+    text = text.replace("⋯", "…")
+    text = text.replace("···", "…")
+    text = text.replace("・・・", "…")
+    text = text.replace("...", "…")
+    text = re.sub(r"\s+", "", text)
+    text = re.sub(r"[^\u4e00-\u9fff\s_,\.\?!;:\'…]", "", text)
+    text = re.sub(r"\s*([,\.\?!;:\'…])\s*", r"\1", text)
+    return text
+def change_tone(bopomofo: str, tone: str) -> str:
+    if bopomofo[-1] not in "˙ˊˇˋ":
+        bopomofo = bopomofo + tone
+    else:
+        bopomofo = bopomofo[:-1] + tone
+    return bopomofo
+def er_sandhi(word: str, bopomofos: List[str]) -> List[str]:
+    if len(word) > 1 and word[-1] == "儿" and word not in must_not_er_words:
+        bopomofos[-1] = change_tone(bopomofos[-1], "˙")
+    return bopomofos
+def bu_sandhi(word: str, bopomofos: List[str]) -> List[str]:
+    valid_char = set(word)
+    if len(valid_char) == 1 and "不" in valid_char:
+        pass
+    elif word in ["不字"]:
+        pass
+    elif len(word) == 3 and word[1] == "不" and bopomofos[1][:-1] == "ㄅㄨ":
+        bopomofos[1] = bopomofos[1][:-1] + "˙"
+    else:
+        for i, char in enumerate(word):
+            if (
+                i + 1 < len(bopomofos)
+                and char == "不"
+                and i + 1 < len(word)
+                and 0 < len(bopomofos[i + 1])
+                and bopomofos[i + 1][-1] == "ˋ"
+            ):
+                bopomofos[i] = bopomofos[i][:-1] + "ˊ"
+    return bopomofos
+def yi_sandhi(word: str, bopomofos: List[str]) -> List[str]:
+    punc = "：，；。？！“”‘’':,;.?!()（）{}【】[]-~`、 "
+    if word.find("一") != -1 and any(
+        [item.isnumeric() for item in word if item != "一"]
+    ):
+        for i in range(len(word)):
+            if (
+                i == 0
+                and word[0] == "一"
+                and len(word) > 1
+                and word[1]
+                not in [
+                    "零",
+                    "一",
+                    "二",
+                    "三",
+                    "四",
+                    "五",
+                    "六",
+                    "七",
+                    "八",
+                    "九",
+                    "十",
+                ]
+            ):
+                if len(bopomofos[0]) > 0 and bopomofos[1][-1] in ["ˋ", "˙"]:
+                    bopomofos[0] = change_tone(bopomofos[0], "ˊ")
+                else:
+                    bopomofos[0] = change_tone(bopomofos[0], "ˋ")
+            elif word[i] == "一":
+                bopomofos[i] = change_tone(bopomofos[i], "")
+        return bopomofos
+    elif len(word) == 3 and word[1] == "一" and word[0] == word[-1]:
+        bopomofos[1] = change_tone(bopomofos[1], "˙")
+    elif word.startswith("第一"):
+        bopomofos[1] = change_tone(bopomofos[1], "")
+    elif word.startswith("一月") or word.startswith("一日") or word.startswith("一号"):
+        bopomofos[0] = change_tone(bopomofos[0], "")
+    else:
+        for i, char in enumerate(word):
+            if char == "一" and i + 1 < len(word):
+                if (
+                    len(bopomofos) > i + 1
+                    and len(bopomofos[i + 1]) > 0
+                    and bopomofos[i + 1][-1] in {"ˋ"}
+                ):
+                    bopomofos[i] = change_tone(bopomofos[i], "ˊ")
+                else:
+                    if word[i + 1] not in punc:
+                        bopomofos[i] = change_tone(bopomofos[i], "ˋ")
+                    else:
+                        pass
+    return bopomofos
+def merge_bu(seg: List) -> List:
+    new_seg = []
+    last_word = ""
+    for word in seg:
+        if word != "不":
+            if last_word == "不":
+                word = last_word + word
+            new_seg.append(word)
+        last_word = word
+    return new_seg
+def merge_er(seg: List) -> List:
+    new_seg = []
+    for i, word in enumerate(seg):
+        if i - 1 >= 0 and word == "儿":
+            new_seg[-1] = new_seg[-1] + seg[i]
+        else:
+            new_seg.append(word)
+    return new_seg
+def merge_yi(seg: List) -> List:
+    new_seg = []
+    # function 1
+    for i, word in enumerate(seg):
+        if (
+            i - 1 >= 0
+            and word == "一"
+            and i + 1 < len(seg)
+            and seg[i - 1] == seg[i + 1]
+        ):
+            if i - 1 < len(new_seg):
+                new_seg[i - 1] = new_seg[i - 1] + "一" + new_seg[i - 1]
+            else:
+                new_seg.append(word)
+                new_seg.append(seg[i + 1])
+        else:
+            if i - 2 >= 0 and seg[i - 1] == "一" and seg[i - 2] == word:
+                continue
+            else:
+                new_seg.append(word)
+    seg = new_seg
+    new_seg = []
+    isnumeric_flag = False
+    for i, word in enumerate(seg):
+        if all([item.isnumeric() for item in word]) and not isnumeric_flag:
+            isnumeric_flag = True
+            new_seg.append(word)
+        else:
+            new_seg.append(word)
+    seg = new_seg
+    new_seg = []
+    # function 2
+    for i, word in enumerate(seg):
+        if new_seg and new_seg[-1] == "一":
+            new_seg[-1] = new_seg[-1] + word
+        else:
+            new_seg.append(word)
+    return new_seg
+# Word Segmentation, and convert Chinese pronunciation to pinyin (bopomofo)
+def chinese_to_bopomofo(text_short, sentence):
+    # bopomofos = conv(text_short)
+    words = jieba.lcut(text_short, cut_all=False)
+    words = merge_yi(words)
+    words = merge_bu(words)
+    words = merge_er(words)
+    text = ""
+    char_index = 0
+    for word in words:
+        bopomofos = []
+        if word in word_pinyin_dict and word not in poly_dict:
+            pinyin = word_pinyin_dict[word]
+            for py in pinyin.split(" "):
+                if py[:-1] in pinyin_2_bopomofo_dict and py[-1] in tone_dict:
+                    bopomofos.append(
+                        pinyin_2_bopomofo_dict[py[:-1]] + tone_dict[py[-1]]
+                    )
+                    if BLANK_LEVEL == 1:
+                        bopomofos.append("_")
+                else:
+                    bopomofos_lazy = lazy_pinyin(word, BOPOMOFO)
+                    bopomofos += bopomofos_lazy
+                    if BLANK_LEVEL == 1:
+                        bopomofos.append("_")
+        else:
+            for i in range(len(word)):
+                c = word[i]
+                if c in poly_dict:
+                    poly_pinyin = g2pw_poly_predict.predict_process(
+                        [text_short, char_index + i]
+                    )[0]
+                    py = poly_pinyin[2:-1]
+                    bopomofos.append(
+                        pinyin_2_bopomofo_dict[py[:-1]] + tone_dict[py[-1]]
+                    )
+                    if BLANK_LEVEL == 1:
+                        bopomofos.append("_")
+                elif c in word_pinyin_dict:
+                    py = word_pinyin_dict[c]
+                    bopomofos.append(
+                        pinyin_2_bopomofo_dict[py[:-1]] + tone_dict[py[-1]]
+                    )
+                    if BLANK_LEVEL == 1:
+                        bopomofos.append("_")
+                else:
+                    bopomofos.append(c)
+                    if BLANK_LEVEL == 1:
+                        bopomofos.append("_")
+        if BLANK_LEVEL == 2:
+            bopomofos.append("_")
+        char_index += len(word)
+        if (
+            len(word) == 3
+            and bopomofos[0][-1] == "ˇ"
+            and bopomofos[1][-1] == "ˇ"
+            and bopomofos[-1][-1] == "ˇ"
+        ):
+            bopomofos[0] = bopomofos[0] + "ˊ"
+            bopomofos[1] = bopomofos[1] + "ˊ"
+        if len(word) == 2 and bopomofos[0][-1] == "ˇ" and bopomofos[-1][-1] == "ˇ":
+            bopomofos[0] = bopomofos[0][:-1] + "ˊ"
+        bopomofos = bu_sandhi(word, bopomofos)
+        bopomofos = yi_sandhi(word, bopomofos)
+        bopomofos = er_sandhi(word, bopomofos)
+        if not re.search("[\u4e00-\u9fff]", word):
+            text += "|" + word
+            continue
+        for i in range(len(bopomofos)):
+            bopomofos[i] = re.sub(r"([\u3105-\u3129])$", r"\1ˉ", bopomofos[i])
+        if text != "":
+            text += "|"
+        text += "|".join(bopomofos)
+    return text
+# Convert latin pronunciation to pinyin (bopomofo)
+def latin_to_bopomofo(text):
+    for regex, replacement in _latin_to_bopomofo:
+        text = re.sub(regex, replacement, text)
+    return text
+# Convert pinyin (bopomofo) to IPA
+def bopomofo_to_ipa(text):
+    for regex, replacement in _bopomofo_to_ipa:
+        text = re.sub(regex, replacement, text)
+    return text
+def _chinese_to_ipa(text, sentence):
+    text = number_to_chinese(text.strip())
+    text = normalization(text)
+    text = chinese_to_bopomofo(text, sentence)
+    # pinyin = bpmf_to_pinyin(text)
+    text = latin_to_bopomofo(text)
+    text = bopomofo_to_ipa(text)
+    text = re.sub("([sɹ]`[⁼ʰ]?)([→↓↑ ]+|$)", r"\1ɹ\2", text)
+    text = re.sub("([s][⁼ʰ]?)([→↓↑ ]+|$)", r"\1ɹ\2", text)
+    text = re.sub(r"^\||[^\w\s_,\.\?!;:\'…\|→↓↑⁼ʰ`]", "", text)
+    text = re.sub(r"([,\.\?!;:\'…])", r"|\1|", text)
+    text = re.sub(r"\|+", "|", text)
+    text = text.rstrip("|")
+    return text
+# Convert Chinese to IPA
+def chinese_to_ipa(text, sentence, text_tokenizer):
+    # phonemes = text_tokenizer(text.strip())
+    if type(text) == str:
+        return _chinese_to_ipa(text, sentence)
+    else:
+        result_ph = []
+        for t in text:
+            result_ph.append(_chinese_to_ipa(t, sentence))
+        return result_ph

diffrhythm/g2p/g2p/text_tokenizers.py ADDED Viewed

	@@ -0,0 +1,85 @@

+# Copyright (c) 2024 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import re
+import os
+from typing import List, Pattern, Union
+from phonemizer.utils import list2str, str2list
+from phonemizer.backend import EspeakBackend
+from phonemizer.backend.espeak.language_switch import LanguageSwitch
+from phonemizer.backend.espeak.words_mismatch import WordMismatch
+from phonemizer.punctuation import Punctuation
+# from zer.separator import Separator
+from phonemizer.separator import Separator
+class TextTokenizer:
+    """Phonemize Text."""
+    def __init__(
+        self,
+        language="en-us",
+        backend="espeak",
+        separator=Separator(word="|_|", syllable="-", phone="|"),
+        preserve_punctuation=True,
+        with_stress: bool = False,
+        tie: Union[bool, str] = False,
+        language_switch: LanguageSwitch = "remove-flags",
+        words_mismatch: WordMismatch = "ignore",
+    ) -> None:
+        self.preserve_punctuation_marks = ",.?!;:'…"
+        self.backend = EspeakBackend(
+            language,
+            punctuation_marks=self.preserve_punctuation_marks,
+            preserve_punctuation=preserve_punctuation,
+            with_stress=with_stress,
+            tie=tie,
+            language_switch=language_switch,
+            words_mismatch=words_mismatch,
+        )
+        self.separator = separator
+    # convert chinese punctuation to english punctuation
+    def convert_chinese_punctuation(self, text: str) -> str:
+        text = text.replace("，", ",")
+        text = text.replace("。", ".")
+        text = text.replace("！", "!")
+        text = text.replace("？", "?")
+        text = text.replace("；", ";")
+        text = text.replace("：", ":")
+        text = text.replace("、", ",")
+        text = text.replace("‘", "'")
+        text = text.replace("’", "'")
+        text = text.replace("⋯", "…")
+        text = text.replace("···", "…")
+        text = text.replace("・・・", "…")
+        text = text.replace("...", "…")
+        return text
+    def __call__(self, text, strip=True) -> List[str]:
+        text_type = type(text)
+        normalized_text = []
+        for line in str2list(text):
+            line = self.convert_chinese_punctuation(line.strip())
+            line = re.sub(r"[^\w\s_,\.\?!;:\'…]", "", line)
+            line = re.sub(r"\s*([,\.\?!;:\'…])\s*", r"\1", line)
+            line = re.sub(r"\s+", " ", line)
+            normalized_text.append(line)
+        # print("Normalized test: ", normalized_text[0])
+        phonemized = self.backend.phonemize(
+            normalized_text, separator=self.separator, strip=strip, njobs=1
+        )
+        if text_type == str:
+            phonemized = re.sub(r"([,\.\?!;:\'…])", r"|\1|", list2str(phonemized))
+            phonemized = re.sub(r"\|+", "|", phonemized)
+            phonemized = phonemized.rstrip("|")
+        else:
+            for i in range(len(phonemized)):
+                phonemized[i] = re.sub(r"([,\.\?!;:\'…])", r"|\1|", phonemized[i])
+                phonemized[i] = re.sub(r"\|+", "|", phonemized[i])
+                phonemized[i] = phonemized[i].rstrip("|")
+        return phonemized

diffrhythm/g2p/g2p/vocab.json ADDED Viewed

	@@ -0,0 +1,372 @@

+{
+	"vocab": {
+		",": 0,
+		".": 1,
+		"?": 2,
+		"!": 3,
+		"_": 4,
+		"iː": 5,
+		"ɪ": 6,
+		"ɜː": 7,
+		"ɚ": 8,
+		"oːɹ": 9,
+		"ɔː": 10,
+		"ɔːɹ": 11,
+		"ɑː": 12,
+		"uː": 13,
+		"ʊ": 14,
+		"ɑːɹ": 15,
+		"ʌ": 16,
+		"ɛ": 17,
+		"æ": 18,
+		"eɪ": 19,
+		"aɪ": 20,
+		"ɔɪ": 21,
+		"aʊ": 22,
+		"oʊ": 23,
+		"ɪɹ": 24,
+		"ɛɹ": 25,
+		"ʊɹ": 26,
+		"p": 27,
+		"b": 28,
+		"t": 29,
+		"d": 30,
+		"k": 31,
+		"ɡ": 32,
+		"f": 33,
+		"v": 34,
+		"θ": 35,
+		"ð": 36,
+		"s": 37,
+		"z": 38,
+		"ʃ": 39,
+		"ʒ": 40,
+		"h": 41,
+		"tʃ": 42,
+		"dʒ": 43,
+		"m": 44,
+		"n": 45,
+		"ŋ": 46,
+		"j": 47,
+		"w": 48,
+		"ɹ": 49,
+		"l": 50,
+		"tɹ": 51,
+		"dɹ": 52,
+		"ts": 53,
+		"dz": 54,
+		"i": 55,
+		"ɔ": 56,
+		"ə": 57,
+		"ɾ": 58,
+		"iə": 59,
+		"r": 60,
+		"u": 61,
+		"oː": 62,
+		"ɛː": 63,
+		"ɪː": 64,
+		"aɪə": 65,
+		"aɪɚ": 66,
+		"ɑ̃": 67,
+		"ç": 68,
+		"ɔ̃": 69,
+		"ææ": 70,
+		"ɐɐ": 71,
+		"ɡʲ": 72,
+		"nʲ": 73,
+		"iːː": 74,
+		"p⁼": 75,
+		"pʰ": 76,
+		"t⁼": 77,
+		"tʰ": 78,
+		"k⁼": 79,
+		"kʰ": 80,
+		"x": 81,
+		"tʃ⁼": 82,
+		"tʃʰ": 83,
+		"ts`⁼": 84,
+		"ts`ʰ": 85,
+		"s`": 86,
+		"ɹ`": 87,
+		"ts⁼": 88,
+		"tsʰ": 89,
+		"p⁼wo": 90,
+		"p⁼wo→": 91,
+		"p⁼wo↑": 92,
+		"p⁼wo↓↑": 93,
+		"p⁼wo↓": 94,
+		"pʰwo": 95,
+		"pʰwo→": 96,
+		"pʰwo↑": 97,
+		"pʰwo↓↑": 98,
+		"pʰwo↓": 99,
+		"mwo": 100,
+		"mwo→": 101,
+		"mwo↑": 102,
+		"mwo↓↑": 103,
+		"mwo↓": 104,
+		"fwo": 105,
+		"fwo→": 106,
+		"fwo↑": 107,
+		"fwo↓↑": 108,
+		"fwo↓": 109,
+		"jɛn": 110,
+		"jɛn→": 111,
+		"jɛn↑": 112,
+		"jɛn↓↑": 113,
+		"jɛn↓": 114,
+		"ɥæn": 115,
+		"ɥæn→": 116,
+		"ɥæn↑": 117,
+		"ɥæn↓↑": 118,
+		"ɥæn↓": 119,
+		"in": 120,
+		"in→": 121,
+		"in↑": 122,
+		"in↓↑": 123,
+		"in↓": 124,
+		"ɥn": 125,
+		"ɥn→": 126,
+		"ɥn↑": 127,
+		"ɥn↓↑": 128,
+		"ɥn↓": 129,
+		"iŋ": 130,
+		"iŋ→": 131,
+		"iŋ↑": 132,
+		"iŋ↓↑": 133,
+		"iŋ↓": 134,
+		"ʊŋ": 135,
+		"ʊŋ→": 136,
+		"ʊŋ↑": 137,
+		"ʊŋ↓↑": 138,
+		"ʊŋ↓": 139,
+		"jʊŋ": 140,
+		"jʊŋ→": 141,
+		"jʊŋ↑": 142,
+		"jʊŋ↓↑": 143,
+		"jʊŋ↓": 144,
+		"ia": 145,
+		"ia→": 146,
+		"ia↑": 147,
+		"ia↓↑": 148,
+		"ia↓": 149,
+		"iɛ": 150,
+		"iɛ→": 151,
+		"iɛ↑": 152,
+		"iɛ↓↑": 153,
+		"iɛ↓": 154,
+		"iɑʊ": 155,
+		"iɑʊ→": 156,
+		"iɑʊ↑": 157,
+		"iɑʊ↓↑": 158,
+		"iɑʊ↓": 159,
+		"ioʊ": 160,
+		"ioʊ→": 161,
+		"ioʊ↑": 162,
+		"ioʊ↓↑": 163,
+		"ioʊ↓": 164,
+		"iɑŋ": 165,
+		"iɑŋ→": 166,
+		"iɑŋ↑": 167,
+		"iɑŋ↓↑": 168,
+		"iɑŋ↓": 169,
+		"ua": 170,
+		"ua→": 171,
+		"ua↑": 172,
+		"ua↓↑": 173,
+		"ua↓": 174,
+		"uo": 175,
+		"uo→": 176,
+		"uo↑": 177,
+		"uo↓↑": 178,
+		"uo↓": 179,
+		"uaɪ": 180,
+		"uaɪ→": 181,
+		"uaɪ↑": 182,
+		"uaɪ↓↑": 183,
+		"uaɪ↓": 184,
+		"ueɪ": 185,
+		"ueɪ→": 186,
+		"ueɪ↑": 187,
+		"ueɪ↓↑": 188,
+		"ueɪ↓": 189,
+		"uan": 190,
+		"uan→": 191,
+		"uan↑": 192,
+		"uan↓↑": 193,
+		"uan↓": 194,
+		"uən": 195,
+		"uən→": 196,
+		"uən↑": 197,
+		"uən↓↑": 198,
+		"uən↓": 199,
+		"uɑŋ": 200,
+		"uɑŋ→": 201,
+		"uɑŋ↑": 202,
+		"uɑŋ↓↑": 203,
+		"uɑŋ↓": 204,
+		"ɥɛ": 205,
+		"ɥɛ→": 206,
+		"ɥɛ↑": 207,
+		"ɥɛ↓↑": 208,
+		"ɥɛ↓": 209,
+		"a": 210,
+		"a→": 211,
+		"a↑": 212,
+		"a↓↑": 213,
+		"a↓": 214,
+		"o": 215,
+		"o→": 216,
+		"o↑": 217,
+		"o↓↑": 218,
+		"o↓": 219,
+		"ə→": 220,
+		"ə↑": 221,
+		"ə↓↑": 222,
+		"ə↓": 223,
+		"ɛ→": 224,
+		"ɛ↑": 225,
+		"ɛ↓↑": 226,
+		"ɛ↓": 227,
+		"aɪ→": 228,
+		"aɪ↑": 229,
+		"aɪ↓↑": 230,
+		"aɪ↓": 231,
+		"eɪ→": 232,
+		"eɪ↑": 233,
+		"eɪ↓↑": 234,
+		"eɪ↓": 235,
+		"ɑʊ": 236,
+		"ɑʊ→": 237,
+		"ɑʊ↑": 238,
+		"ɑʊ↓↑": 239,
+		"ɑʊ↓": 240,
+		"oʊ→": 241,
+		"oʊ↑": 242,
+		"oʊ↓↑": 243,
+		"oʊ↓": 244,
+		"an": 245,
+		"an→": 246,
+		"an↑": 247,
+		"an↓↑": 248,
+		"an↓": 249,
+		"ən": 250,
+		"ən→": 251,
+		"ən↑": 252,
+		"ən↓↑": 253,
+		"ən↓": 254,
+		"ɑŋ": 255,
+		"ɑŋ→": 256,
+		"ɑŋ↑": 257,
+		"ɑŋ↓↑": 258,
+		"ɑŋ↓": 259,
+		"əŋ": 260,
+		"əŋ→": 261,
+		"əŋ↑": 262,
+		"əŋ↓↑": 263,
+		"əŋ↓": 264,
+		"əɹ": 265,
+		"əɹ→": 266,
+		"əɹ↑": 267,
+		"əɹ↓↑": 268,
+		"əɹ↓": 269,
+		"i→": 270,
+		"i↑": 271,
+		"i↓↑": 272,
+		"i↓": 273,
+		"u→": 274,
+		"u↑": 275,
+		"u↓↑": 276,
+		"u↓": 277,
+		"ɥ": 278,
+		"ɥ→": 279,
+		"ɥ↑": 280,
+		"ɥ↓↑": 281,
+		"ɥ↓": 282,
+		"ts`⁼ɹ": 283,
+		"ts`⁼ɹ→": 284,
+		"ts`⁼ɹ↑": 285,
+		"ts`⁼ɹ↓↑": 286,
+		"ts`⁼ɹ↓": 287,
+		"ts`ʰɹ": 288,
+		"ts`ʰɹ→": 289,
+		"ts`ʰɹ↑": 290,
+		"ts`ʰɹ↓↑": 291,
+		"ts`ʰɹ↓": 292,
+		"s`ɹ": 293,
+		"s`ɹ→": 294,
+		"s`ɹ↑": 295,
+		"s`ɹ↓↑": 296,
+		"s`ɹ���": 297,
+		"ɹ`ɹ": 298,
+		"ɹ`ɹ→": 299,
+		"ɹ`ɹ↑": 300,
+		"ɹ`ɹ↓↑": 301,
+		"ɹ`ɹ↓": 302,
+		"ts⁼ɹ": 303,
+		"ts⁼ɹ→": 304,
+		"ts⁼ɹ↑": 305,
+		"ts⁼ɹ↓↑": 306,
+		"ts⁼ɹ↓": 307,
+		"tsʰɹ": 308,
+		"tsʰɹ→": 309,
+		"tsʰɹ↑": 310,
+		"tsʰɹ↓↑": 311,
+		"tsʰɹ↓": 312,
+		"sɹ": 313,
+		"sɹ→": 314,
+		"sɹ↑": 315,
+		"sɹ↓↑": 316,
+		"sɹ↓": 317,
+		"ɯ": 318,
+		"e": 319,
+		"aː": 320,
+		"ɯː": 321,
+		"eː": 322,
+		"ç": 323,
+		"ɸ": 324,
+		"ɰᵝ": 325,
+		"ɴ": 326,
+		"g": 327,
+		"dʑ": 328,
+		"q": 329,
+		"ː": 330,
+		"bj": 331,
+		"tɕ": 332,
+		"dej": 333,
+		"tej": 334,
+		"gj": 335,
+		"gɯ": 336,
+		"çj": 337,
+		"kj": 338,
+		"kɯ": 339,
+		"mj": 340,
+		"nj": 341,
+		"pj": 342,
+		"ɾj": 343,
+		"ɕ": 344,
+		"tsɯ": 345,
+		"ɐ": 346,
+		"ɑ": 347,
+		"ɒ": 348,
+		"ɜ": 349,
+		"ɫ": 350,
+		"ʑ": 351,
+		"ʲ": 352,
+		"y": 353,
+		"ø": 354,
+		"œ": 355,
+		"ʁ": 356,
+		"̃": 357,
+		"ɲ": 358,
+		":": 359,
+		";": 360,
+		"'": 361,
+		"…": 362
+	}
+}

diffrhythm/g2p/g2p_generation.py ADDED Viewed

	@@ -0,0 +1,133 @@

+# Copyright (c) 2024 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import os
+import sys
+from diffrhythm.g2p.g2p import PhonemeBpeTokenizer
+from diffrhythm.g2p.utils.g2p import phonemizer_g2p
+import tqdm
+from typing import List
+import json
+import os
+import re
+def ph_g2p(text, language):
+    return phonemizer_g2p(text=text, language=language)
+def g2p(text, sentence, language):
+    return text_tokenizer.tokenize(text=text, sentence=sentence, language=language)
+def is_chinese(char):
+    if char >= "\u4e00" and char <= "\u9fa5":
+        return True
+    else:
+        return False
+def is_alphabet(char):
+    if (char >= "\u0041" and char <= "\u005a") or (
+        char >= "\u0061" and char <= "\u007a"
+    ):
+        return True
+    else:
+        return False
+def is_other(char):
+    if not (is_chinese(char) or is_alphabet(char)):
+        return True
+    else:
+        return False
+def get_segment(text: str) -> List[str]:
+    # sentence --> [ch_part, en_part, ch_part, ...]
+    segments = []
+    types = []
+    flag = 0
+    temp_seg = ""
+    temp_lang = ""
+    # Determine the type of each character. type: blank, chinese, alphabet, number, unk and point.
+    for i, ch in enumerate(text):
+        if is_chinese(ch):
+            types.append("zh")
+        elif is_alphabet(ch):
+            types.append("en")
+        else:
+            types.append("other")
+    assert len(types) == len(text)
+    for i in range(len(types)):
+        # find the first char of the seg
+        if flag == 0:
+            temp_seg += text[i]
+            temp_lang = types[i]
+            flag = 1
+        else:
+            if temp_lang == "other":
+                if types[i] == temp_lang:
+                    temp_seg += text[i]
+                else:
+                    temp_seg += text[i]
+                    temp_lang = types[i]
+            else:
+                if types[i] == temp_lang:
+                    temp_seg += text[i]
+                elif types[i] == "other":
+                    temp_seg += text[i]
+                else:
+                    segments.append((temp_seg, temp_lang))
+                    temp_seg = text[i]
+                    temp_lang = types[i]
+                    flag = 1
+    segments.append((temp_seg, temp_lang))
+    return segments
+def chn_eng_g2p(text: str):
+    # now only en and ch
+    segments = get_segment(text)
+    all_phoneme = ""
+    all_tokens = []
+    for index in range(len(segments)):
+        seg = segments[index]
+        phoneme, token = g2p(seg[0], text, seg[1])
+        all_phoneme += phoneme + "|"
+        all_tokens += token
+        if seg[1] == "en" and index == len(segments) - 1 and all_phoneme[-2] == "_":
+            all_phoneme = all_phoneme[:-2]
+            all_tokens = all_tokens[:-1]
+    return all_phoneme, all_tokens
+text_tokenizer = PhonemeBpeTokenizer()
+with open("./diffrhythm/g2p/g2p/vocab.json", "r") as f:
+    json_data = f.read()
+data = json.loads(json_data)
+vocab = data["vocab"]
+if __name__ == '__main__':
+    phone, token = chn_eng_g2p("你好，hello world")
+    phone, token = chn_eng_g2p("你好，hello world, Bonjour, 테스트 해 보겠습니다, 五月雨緑")
+    print(phone)
+    print(token)
+    #phone, token = text_tokenizer.tokenize("你好，hello world, Bonjour, 테스트 해 보겠습니다, 五月雨緑", "", "auto")
+    phone, token = text_tokenizer.tokenize("緑", "", "auto")
+    #phone, token = text_tokenizer.tokenize("आइए इसका परीक्षण करें", "", "auto")
+    #phone, token = text_tokenizer.tokenize("आइए इसका परीक्षण करें", "", "other")
+    print(phone)
+    print(token)

diffrhythm/g2p/sources/bpmf_2_pinyin.txt ADDED Viewed

	@@ -0,0 +1,41 @@

+b	ㄅ
+p	ㄆ
+m	ㄇ
+f	ㄈ
+d	ㄉ
+t	ㄊ
+n	ㄋ
+l	ㄌ
+g	ㄍ
+k	ㄎ
+h	ㄏ
+j	ㄐ
+q	ㄑ
+x	ㄒ
+zh	ㄓ
+ch	ㄔ
+sh	ㄕ
+r	ㄖ
+z	ㄗ
+c	ㄘ
+s	ㄙ
+i	ㄧ
+u	ㄨ
+v	ㄩ
+a	ㄚ
+o	ㄛ
+e	ㄜ
+e	ㄝ
+ai	ㄞ
+ei	ㄟ
+ao	ㄠ
+ou	ㄡ
+an	ㄢ
+en	ㄣ
+ang	ㄤ
+eng	ㄥ
+er	ㄦ
+2	ˊ
+3	ˇ
+4	ˋ
+0	˙

diffrhythm/g2p/sources/chinese_lexicon.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a3a7685d1c3e68eb2fa304bfc63e90c90c3c1a1948839a5b1b507b2131b3e2fb
+size 14779443

diffrhythm/g2p/sources/g2p_chinese_model/config.json ADDED Viewed

	@@ -0,0 +1,819 @@

+{
+  "_name_or_path": "/BERT-POLY-v2/pretrained_models/mini_bert",
+  "architectures": [
+    "BertPoly"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 384,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3",
+    "4": "LABEL_4",
+    "5": "LABEL_5",
+    "6": "LABEL_6",
+    "7": "LABEL_7",
+    "8": "LABEL_8",
+    "9": "LABEL_9",
+    "10": "LABEL_10",
+    "11": "LABEL_11",
+    "12": "LABEL_12",
+    "13": "LABEL_13",
+    "14": "LABEL_14",
+    "15": "LABEL_15",
+    "16": "LABEL_16",
+    "17": "LABEL_17",
+    "18": "LABEL_18",
+    "19": "LABEL_19",
+    "20": "LABEL_20",
+    "21": "LABEL_21",
+    "22": "LABEL_22",
+    "23": "LABEL_23",
+    "24": "LABEL_24",
+    "25": "LABEL_25",
+    "26": "LABEL_26",
+    "27": "LABEL_27",
+    "28": "LABEL_28",
+    "29": "LABEL_29",
+    "30": "LABEL_30",
+    "31": "LABEL_31",
+    "32": "LABEL_32",
+    "33": "LABEL_33",
+    "34": "LABEL_34",
+    "35": "LABEL_35",
+    "36": "LABEL_36",
+    "37": "LABEL_37",
+    "38": "LABEL_38",
+    "39": "LABEL_39",
+    "40": "LABEL_40",
+    "41": "LABEL_41",
+    "42": "LABEL_42",
+    "43": "LABEL_43",
+    "44": "LABEL_44",
+    "45": "LABEL_45",
+    "46": "LABEL_46",
+    "47": "LABEL_47",
+    "48": "LABEL_48",
+    "49": "LABEL_49",
+    "50": "LABEL_50",
+    "51": "LABEL_51",
+    "52": "LABEL_52",
+    "53": "LABEL_53",
+    "54": "LABEL_54",
+    "55": "LABEL_55",
+    "56": "LABEL_56",
+    "57": "LABEL_57",
+    "58": "LABEL_58",
+    "59": "LABEL_59",
+    "60": "LABEL_60",
+    "61": "LABEL_61",
+    "62": "LABEL_62",
+    "63": "LABEL_63",
+    "64": "LABEL_64",
+    "65": "LABEL_65",
+    "66": "LABEL_66",
+    "67": "LABEL_67",
+    "68": "LABEL_68",
+    "69": "LABEL_69",
+    "70": "LABEL_70",
+    "71": "LABEL_71",
+    "72": "LABEL_72",
+    "73": "LABEL_73",
+    "74": "LABEL_74",
+    "75": "LABEL_75",
+    "76": "LABEL_76",
+    "77": "LABEL_77",
+    "78": "LABEL_78",
+    "79": "LABEL_79",
+    "80": "LABEL_80",
+    "81": "LABEL_81",
+    "82": "LABEL_82",
+    "83": "LABEL_83",
+    "84": "LABEL_84",
+    "85": "LABEL_85",
+    "86": "LABEL_86",
+    "87": "LABEL_87",
+    "88": "LABEL_88",
+    "89": "LABEL_89",
+    "90": "LABEL_90",
+    "91": "LABEL_91",
+    "92": "LABEL_92",
+    "93": "LABEL_93",
+    "94": "LABEL_94",
+    "95": "LABEL_95",
+    "96": "LABEL_96",
+    "97": "LABEL_97",
+    "98": "LABEL_98",
+    "99": "LABEL_99",
+    "100": "LABEL_100",
+    "101": "LABEL_101",
+    "102": "LABEL_102",
+    "103": "LABEL_103",
+    "104": "LABEL_104",
+    "105": "LABEL_105",
+    "106": "LABEL_106",
+    "107": "LABEL_107",
+    "108": "LABEL_108",
+    "109": "LABEL_109",
+    "110": "LABEL_110",
+    "111": "LABEL_111",
+    "112": "LABEL_112",
+    "113": "LABEL_113",
+    "114": "LABEL_114",
+    "115": "LABEL_115",
+    "116": "LABEL_116",
+    "117": "LABEL_117",
+    "118": "LABEL_118",
+    "119": "LABEL_119",
+    "120": "LABEL_120",
+    "121": "LABEL_121",
+    "122": "LABEL_122",
+    "123": "LABEL_123",
+    "124": "LABEL_124",
+    "125": "LABEL_125",
+    "126": "LABEL_126",
+    "127": "LABEL_127",
+    "128": "LABEL_128",
+    "129": "LABEL_129",
+    "130": "LABEL_130",
+    "131": "LABEL_131",
+    "132": "LABEL_132",
+    "133": "LABEL_133",
+    "134": "LABEL_134",
+    "135": "LABEL_135",
+    "136": "LABEL_136",
+    "137": "LABEL_137",
+    "138": "LABEL_138",
+    "139": "LABEL_139",
+    "140": "LABEL_140",
+    "141": "LABEL_141",
+    "142": "LABEL_142",
+    "143": "LABEL_143",
+    "144": "LABEL_144",
+    "145": "LABEL_145",
+    "146": "LABEL_146",
+    "147": "LABEL_147",
+    "148": "LABEL_148",
+    "149": "LABEL_149",
+    "150": "LABEL_150",
+    "151": "LABEL_151",
+    "152": "LABEL_152",
+    "153": "LABEL_153",
+    "154": "LABEL_154",
+    "155": "LABEL_155",
+    "156": "LABEL_156",
+    "157": "LABEL_157",
+    "158": "LABEL_158",
+    "159": "LABEL_159",
+    "160": "LABEL_160",
+    "161": "LABEL_161",
+    "162": "LABEL_162",
+    "163": "LABEL_163",
+    "164": "LABEL_164",
+    "165": "LABEL_165",
+    "166": "LABEL_166",
+    "167": "LABEL_167",
+    "168": "LABEL_168",
+    "169": "LABEL_169",
+    "170": "LABEL_170",
+    "171": "LABEL_171",
+    "172": "LABEL_172",
+    "173": "LABEL_173",
+    "174": "LABEL_174",
+    "175": "LABEL_175",
+    "176": "LABEL_176",
+    "177": "LABEL_177",
+    "178": "LABEL_178",
+    "179": "LABEL_179",
+    "180": "LABEL_180",
+    "181": "LABEL_181",
+    "182": "LABEL_182",
+    "183": "LABEL_183",
+    "184": "LABEL_184",
+    "185": "LABEL_185",
+    "186": "LABEL_186",
+    "187": "LABEL_187",
+    "188": "LABEL_188",
+    "189": "LABEL_189",
+    "190": "LABEL_190",
+    "191": "LABEL_191",
+    "192": "LABEL_192",
+    "193": "LABEL_193",
+    "194": "LABEL_194",
+    "195": "LABEL_195",
+    "196": "LABEL_196",
+    "197": "LABEL_197",
+    "198": "LABEL_198",
+    "199": "LABEL_199",
+    "200": "LABEL_200",
+    "201": "LABEL_201",
+    "202": "LABEL_202",
+    "203": "LABEL_203",
+    "204": "LABEL_204",
+    "205": "LABEL_205",
+    "206": "LABEL_206",
+    "207": "LABEL_207",
+    "208": "LABEL_208",
+    "209": "LABEL_209",
+    "210": "LABEL_210",
+    "211": "LABEL_211",
+    "212": "LABEL_212",
+    "213": "LABEL_213",
+    "214": "LABEL_214",
+    "215": "LABEL_215",
+    "216": "LABEL_216",
+    "217": "LABEL_217",
+    "218": "LABEL_218",
+    "219": "LABEL_219",
+    "220": "LABEL_220",
+    "221": "LABEL_221",
+    "222": "LABEL_222",
+    "223": "LABEL_223",
+    "224": "LABEL_224",
+    "225": "LABEL_225",
+    "226": "LABEL_226",
+    "227": "LABEL_227",
+    "228": "LABEL_228",
+    "229": "LABEL_229",
+    "230": "LABEL_230",
+    "231": "LABEL_231",
+    "232": "LABEL_232",
+    "233": "LABEL_233",
+    "234": "LABEL_234",
+    "235": "LABEL_235",
+    "236": "LABEL_236",
+    "237": "LABEL_237",
+    "238": "LABEL_238",
+    "239": "LABEL_239",
+    "240": "LABEL_240",
+    "241": "LABEL_241",
+    "242": "LABEL_242",
+    "243": "LABEL_243",
+    "244": "LABEL_244",
+    "245": "LABEL_245",
+    "246": "LABEL_246",
+    "247": "LABEL_247",
+    "248": "LABEL_248",
+    "249": "LABEL_249",
+    "250": "LABEL_250",
+    "251": "LABEL_251",
+    "252": "LABEL_252",
+    "253": "LABEL_253",
+    "254": "LABEL_254",
+    "255": "LABEL_255",
+    "256": "LABEL_256",
+    "257": "LABEL_257",
+    "258": "LABEL_258",
+    "259": "LABEL_259",
+    "260": "LABEL_260",
+    "261": "LABEL_261",
+    "262": "LABEL_262",
+    "263": "LABEL_263",
+    "264": "LABEL_264",
+    "265": "LABEL_265",
+    "266": "LABEL_266",
+    "267": "LABEL_267",
+    "268": "LABEL_268",
+    "269": "LABEL_269",
+    "270": "LABEL_270",
+    "271": "LABEL_271",
+    "272": "LABEL_272",
+    "273": "LABEL_273",
+    "274": "LABEL_274",
+    "275": "LABEL_275",
+    "276": "LABEL_276",
+    "277": "LABEL_277",
+    "278": "LABEL_278",
+    "279": "LABEL_279",
+    "280": "LABEL_280",
+    "281": "LABEL_281",
+    "282": "LABEL_282",
+    "283": "LABEL_283",
+    "284": "LABEL_284",
+    "285": "LABEL_285",
+    "286": "LABEL_286",
+    "287": "LABEL_287",
+    "288": "LABEL_288",
+    "289": "LABEL_289",
+    "290": "LABEL_290",
+    "291": "LABEL_291",
+    "292": "LABEL_292",
+    "293": "LABEL_293",
+    "294": "LABEL_294",
+    "295": "LABEL_295",
+    "296": "LABEL_296",
+    "297": "LABEL_297",
+    "298": "LABEL_298",
+    "299": "LABEL_299",
+    "300": "LABEL_300",
+    "301": "LABEL_301",
+    "302": "LABEL_302",
+    "303": "LABEL_303",
+    "304": "LABEL_304",
+    "305": "LABEL_305",
+    "306": "LABEL_306",
+    "307": "LABEL_307",
+    "308": "LABEL_308",
+    "309": "LABEL_309",
+    "310": "LABEL_310",
+    "311": "LABEL_311",
+    "312": "LABEL_312",
+    "313": "LABEL_313",
+    "314": "LABEL_314",
+    "315": "LABEL_315",
+    "316": "LABEL_316",
+    "317": "LABEL_317",
+    "318": "LABEL_318",
+    "319": "LABEL_319",
+    "320": "LABEL_320",
+    "321": "LABEL_321",
+    "322": "LABEL_322",
+    "323": "LABEL_323",
+    "324": "LABEL_324",
+    "325": "LABEL_325",
+    "326": "LABEL_326",
+    "327": "LABEL_327",
+    "328": "LABEL_328",
+    "329": "LABEL_329",
+    "330": "LABEL_330",
+    "331": "LABEL_331",
+    "332": "LABEL_332",
+    "333": "LABEL_333",
+    "334": "LABEL_334",
+    "335": "LABEL_335",
+    "336": "LABEL_336",
+    "337": "LABEL_337",
+    "338": "LABEL_338",
+    "339": "LABEL_339",
+    "340": "LABEL_340",
+    "341": "LABEL_341",
+    "342": "LABEL_342",
+    "343": "LABEL_343",
+    "344": "LABEL_344",
+    "345": "LABEL_345",
+    "346": "LABEL_346",
+    "347": "LABEL_347",
+    "348": "LABEL_348",
+    "349": "LABEL_349",
+    "350": "LABEL_350",
+    "351": "LABEL_351",
+    "352": "LABEL_352",
+    "353": "LABEL_353",
+    "354": "LABEL_354",
+    "355": "LABEL_355",
+    "356": "LABEL_356",
+    "357": "LABEL_357",
+    "358": "LABEL_358",
+    "359": "LABEL_359",
+    "360": "LABEL_360",
+    "361": "LABEL_361",
+    "362": "LABEL_362",
+    "363": "LABEL_363",
+    "364": "LABEL_364",
+    "365": "LABEL_365",
+    "366": "LABEL_366",
+    "367": "LABEL_367",
+    "368": "LABEL_368",
+    "369": "LABEL_369",
+    "370": "LABEL_370",
+    "371": "LABEL_371",
+    "372": "LABEL_372",
+    "373": "LABEL_373",
+    "374": "LABEL_374",
+    "375": "LABEL_375",
+    "376": "LABEL_376",
+    "377": "LABEL_377",
+    "378": "LABEL_378",
+    "379": "LABEL_379",
+    "380": "LABEL_380",
+    "381": "LABEL_381",
+    "382": "LABEL_382",
+    "383": "LABEL_383",
+    "384": "LABEL_384",
+    "385": "LABEL_385",
+    "386": "LABEL_386",
+    "387": "LABEL_387",
+    "388": "LABEL_388",
+    "389": "LABEL_389",
+    "390": "LABEL_390"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 1536,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_10": 10,
+    "LABEL_100": 100,
+    "LABEL_101": 101,
+    "LABEL_102": 102,
+    "LABEL_103": 103,
+    "LABEL_104": 104,
+    "LABEL_105": 105,
+    "LABEL_106": 106,
+    "LABEL_107": 107,
+    "LABEL_108": 108,
+    "LABEL_109": 109,
+    "LABEL_11": 11,
+    "LABEL_110": 110,
+    "LABEL_111": 111,
+    "LABEL_112": 112,
+    "LABEL_113": 113,
+    "LABEL_114": 114,
+    "LABEL_115": 115,
+    "LABEL_116": 116,
+    "LABEL_117": 117,
+    "LABEL_118": 118,
+    "LABEL_119": 119,
+    "LABEL_12": 12,
+    "LABEL_120": 120,
+    "LABEL_121": 121,
+    "LABEL_122": 122,
+    "LABEL_123": 123,
+    "LABEL_124": 124,
+    "LABEL_125": 125,
+    "LABEL_126": 126,
+    "LABEL_127": 127,
+    "LABEL_128": 128,
+    "LABEL_129": 129,
+    "LABEL_13": 13,
+    "LABEL_130": 130,
+    "LABEL_131": 131,
+    "LABEL_132": 132,
+    "LABEL_133": 133,
+    "LABEL_134": 134,
+    "LABEL_135": 135,
+    "LABEL_136": 136,
+    "LABEL_137": 137,
+    "LABEL_138": 138,
+    "LABEL_139": 139,
+    "LABEL_14": 14,
+    "LABEL_140": 140,
+    "LABEL_141": 141,
+    "LABEL_142": 142,
+    "LABEL_143": 143,
+    "LABEL_144": 144,
+    "LABEL_145": 145,
+    "LABEL_146": 146,
+    "LABEL_147": 147,
+    "LABEL_148": 148,
+    "LABEL_149": 149,
+    "LABEL_15": 15,
+    "LABEL_150": 150,
+    "LABEL_151": 151,
+    "LABEL_152": 152,
+    "LABEL_153": 153,
+    "LABEL_154": 154,
+    "LABEL_155": 155,
+    "LABEL_156": 156,
+    "LABEL_157": 157,
+    "LABEL_158": 158,
+    "LABEL_159": 159,
+    "LABEL_16": 16,
+    "LABEL_160": 160,
+    "LABEL_161": 161,
+    "LABEL_162": 162,
+    "LABEL_163": 163,
+    "LABEL_164": 164,
+    "LABEL_165": 165,
+    "LABEL_166": 166,
+    "LABEL_167": 167,
+    "LABEL_168": 168,
+    "LABEL_169": 169,
+    "LABEL_17": 17,
+    "LABEL_170": 170,
+    "LABEL_171": 171,
+    "LABEL_172": 172,
+    "LABEL_173": 173,
+    "LABEL_174": 174,
+    "LABEL_175": 175,
+    "LABEL_176": 176,
+    "LABEL_177": 177,
+    "LABEL_178": 178,
+    "LABEL_179": 179,
+    "LABEL_18": 18,
+    "LABEL_180": 180,
+    "LABEL_181": 181,
+    "LABEL_182": 182,
+    "LABEL_183": 183,
+    "LABEL_184": 184,
+    "LABEL_185": 185,
+    "LABEL_186": 186,
+    "LABEL_187": 187,
+    "LABEL_188": 188,
+    "LABEL_189": 189,
+    "LABEL_19": 19,
+    "LABEL_190": 190,
+    "LABEL_191": 191,
+    "LABEL_192": 192,
+    "LABEL_193": 193,
+    "LABEL_194": 194,
+    "LABEL_195": 195,
+    "LABEL_196": 196,
+    "LABEL_197": 197,
+    "LABEL_198": 198,
+    "LABEL_199": 199,
+    "LABEL_2": 2,
+    "LABEL_20": 20,
+    "LABEL_200": 200,
+    "LABEL_201": 201,
+    "LABEL_202": 202,
+    "LABEL_203": 203,
+    "LABEL_204": 204,
+    "LABEL_205": 205,
+    "LABEL_206": 206,
+    "LABEL_207": 207,
+    "LABEL_208": 208,
+    "LABEL_209": 209,
+    "LABEL_21": 21,
+    "LABEL_210": 210,
+    "LABEL_211": 211,
+    "LABEL_212": 212,
+    "LABEL_213": 213,
+    "LABEL_214": 214,
+    "LABEL_215": 215,
+    "LABEL_216": 216,
+    "LABEL_217": 217,
+    "LABEL_218": 218,
+    "LABEL_219": 219,
+    "LABEL_22": 22,
+    "LABEL_220": 220,
+    "LABEL_221": 221,
+    "LABEL_222": 222,
+    "LABEL_223": 223,
+    "LABEL_224": 224,
+    "LABEL_225": 225,
+    "LABEL_226": 226,
+    "LABEL_227": 227,
+    "LABEL_228": 228,
+    "LABEL_229": 229,
+    "LABEL_23": 23,
+    "LABEL_230": 230,
+    "LABEL_231": 231,
+    "LABEL_232": 232,
+    "LABEL_233": 233,
+    "LABEL_234": 234,
+    "LABEL_235": 235,
+    "LABEL_236": 236,
+    "LABEL_237": 237,
+    "LABEL_238": 238,
+    "LABEL_239": 239,
+    "LABEL_24": 24,
+    "LABEL_240": 240,
+    "LABEL_241": 241,
+    "LABEL_242": 242,
+    "LABEL_243": 243,
+    "LABEL_244": 244,
+    "LABEL_245": 245,
+    "LABEL_246": 246,
+    "LABEL_247": 247,
+    "LABEL_248": 248,
+    "LABEL_249": 249,
+    "LABEL_25": 25,
+    "LABEL_250": 250,
+    "LABEL_251": 251,
+    "LABEL_252": 252,
+    "LABEL_253": 253,
+    "LABEL_254": 254,
+    "LABEL_255": 255,
+    "LABEL_256": 256,
+    "LABEL_257": 257,
+    "LABEL_258": 258,
+    "LABEL_259": 259,
+    "LABEL_26": 26,
+    "LABEL_260": 260,
+    "LABEL_261": 261,
+    "LABEL_262": 262,
+    "LABEL_263": 263,
+    "LABEL_264": 264,
+    "LABEL_265": 265,
+    "LABEL_266": 266,
+    "LABEL_267": 267,
+    "LABEL_268": 268,
+    "LABEL_269": 269,
+    "LABEL_27": 27,
+    "LABEL_270": 270,
+    "LABEL_271": 271,
+    "LABEL_272": 272,
+    "LABEL_273": 273,
+    "LABEL_274": 274,
+    "LABEL_275": 275,
+    "LABEL_276": 276,
+    "LABEL_277": 277,
+    "LABEL_278": 278,
+    "LABEL_279": 279,
+    "LABEL_28": 28,
+    "LABEL_280": 280,
+    "LABEL_281": 281,
+    "LABEL_282": 282,
+    "LABEL_283": 283,
+    "LABEL_284": 284,
+    "LABEL_285": 285,
+    "LABEL_286": 286,
+    "LABEL_287": 287,
+    "LABEL_288": 288,
+    "LABEL_289": 289,
+    "LABEL_29": 29,
+    "LABEL_290": 290,
+    "LABEL_291": 291,
+    "LABEL_292": 292,
+    "LABEL_293": 293,
+    "LABEL_294": 294,
+    "LABEL_295": 295,
+    "LABEL_296": 296,
+    "LABEL_297": 297,
+    "LABEL_298": 298,
+    "LABEL_299": 299,
+    "LABEL_3": 3,
+    "LABEL_30": 30,
+    "LABEL_300": 300,
+    "LABEL_301": 301,
+    "LABEL_302": 302,
+    "LABEL_303": 303,
+    "LABEL_304": 304,
+    "LABEL_305": 305,
+    "LABEL_306": 306,
+    "LABEL_307": 307,
+    "LABEL_308": 308,
+    "LABEL_309": 309,
+    "LABEL_31": 31,
+    "LABEL_310": 310,
+    "LABEL_311": 311,
+    "LABEL_312": 312,
+    "LABEL_313": 313,
+    "LABEL_314": 314,
+    "LABEL_315": 315,
+    "LABEL_316": 316,
+    "LABEL_317": 317,
+    "LABEL_318": 318,
+    "LABEL_319": 319,
+    "LABEL_32": 32,
+    "LABEL_320": 320,
+    "LABEL_321": 321,
+    "LABEL_322": 322,
+    "LABEL_323": 323,
+    "LABEL_324": 324,
+    "LABEL_325": 325,
+    "LABEL_326": 326,
+    "LABEL_327": 327,
+    "LABEL_328": 328,
+    "LABEL_329": 329,
+    "LABEL_33": 33,
+    "LABEL_330": 330,
+    "LABEL_331": 331,
+    "LABEL_332": 332,
+    "LABEL_333": 333,
+    "LABEL_334": 334,
+    "LABEL_335": 335,
+    "LABEL_336": 336,
+    "LABEL_337": 337,
+    "LABEL_338": 338,
+    "LABEL_339": 339,
+    "LABEL_34": 34,
+    "LABEL_340": 340,
+    "LABEL_341": 341,
+    "LABEL_342": 342,
+    "LABEL_343": 343,
+    "LABEL_344": 344,
+    "LABEL_345": 345,
+    "LABEL_346": 346,
+    "LABEL_347": 347,
+    "LABEL_348": 348,
+    "LABEL_349": 349,
+    "LABEL_35": 35,
+    "LABEL_350": 350,
+    "LABEL_351": 351,
+    "LABEL_352": 352,
+    "LABEL_353": 353,
+    "LABEL_354": 354,
+    "LABEL_355": 355,
+    "LABEL_356": 356,
+    "LABEL_357": 357,
+    "LABEL_358": 358,
+    "LABEL_359": 359,
+    "LABEL_36": 36,
+    "LABEL_360": 360,
+    "LABEL_361": 361,
+    "LABEL_362": 362,
+    "LABEL_363": 363,
+    "LABEL_364": 364,
+    "LABEL_365": 365,
+    "LABEL_366": 366,
+    "LABEL_367": 367,
+    "LABEL_368": 368,
+    "LABEL_369": 369,
+    "LABEL_37": 37,
+    "LABEL_370": 370,
+    "LABEL_371": 371,
+    "LABEL_372": 372,
+    "LABEL_373": 373,
+    "LABEL_374": 374,
+    "LABEL_375": 375,
+    "LABEL_376": 376,
+    "LABEL_377": 377,
+    "LABEL_378": 378,
+    "LABEL_379": 379,
+    "LABEL_38": 38,
+    "LABEL_380": 380,
+    "LABEL_381": 381,
+    "LABEL_382": 382,
+    "LABEL_383": 383,
+    "LABEL_384": 384,
+    "LABEL_385": 385,
+    "LABEL_386": 386,
+    "LABEL_387": 387,
+    "LABEL_388": 388,
+    "LABEL_389": 389,
+    "LABEL_39": 39,
+    "LABEL_390": 390,
+    "LABEL_4": 4,
+    "LABEL_40": 40,
+    "LABEL_41": 41,
+    "LABEL_42": 42,
+    "LABEL_43": 43,
+    "LABEL_44": 44,
+    "LABEL_45": 45,
+    "LABEL_46": 46,
+    "LABEL_47": 47,
+    "LABEL_48": 48,
+    "LABEL_49": 49,
+    "LABEL_5": 5,
+    "LABEL_50": 50,
+    "LABEL_51": 51,
+    "LABEL_52": 52,
+    "LABEL_53": 53,
+    "LABEL_54": 54,
+    "LABEL_55": 55,
+    "LABEL_56": 56,
+    "LABEL_57": 57,
+    "LABEL_58": 58,
+    "LABEL_59": 59,
+    "LABEL_6": 6,
+    "LABEL_60": 60,
+    "LABEL_61": 61,
+    "LABEL_62": 62,
+    "LABEL_63": 63,
+    "LABEL_64": 64,
+    "LABEL_65": 65,
+    "LABEL_66": 66,
+    "LABEL_67": 67,
+    "LABEL_68": 68,
+    "LABEL_69": 69,
+    "LABEL_7": 7,
+    "LABEL_70": 70,
+    "LABEL_71": 71,
+    "LABEL_72": 72,
+    "LABEL_73": 73,
+    "LABEL_74": 74,
+    "LABEL_75": 75,
+    "LABEL_76": 76,
+    "LABEL_77": 77,
+    "LABEL_78": 78,
+    "LABEL_79": 79,
+    "LABEL_8": 8,
+    "LABEL_80": 80,
+    "LABEL_81": 81,
+    "LABEL_82": 82,
+    "LABEL_83": 83,
+    "LABEL_84": 84,
+    "LABEL_85": 85,
+    "LABEL_86": 86,
+    "LABEL_87": 87,
+    "LABEL_88": 88,
+    "LABEL_89": 89,
+    "LABEL_9": 9,
+    "LABEL_90": 90,
+    "LABEL_91": 91,
+    "LABEL_92": 92,
+    "LABEL_93": 93,
+    "LABEL_94": 94,
+    "LABEL_95": 95,
+    "LABEL_96": 96,
+    "LABEL_97": 97,
+    "LABEL_98": 98,
+    "LABEL_99": 99
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 6,
+  "num_relation_heads": 32,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.1",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 21128
+}

diffrhythm/g2p/sources/g2p_chinese_model/poly_bert_model.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8765d835ffdf9811c832d4dc7b6a552757aa8615c01d1184db716a50c20aebbc
+size 76583333

diffrhythm/g2p/sources/g2p_chinese_model/polychar.txt ADDED Viewed

	@@ -0,0 +1,159 @@

+丧
+中
+为
+乌
+乐
+了
+什
+仔
+令
+任
+会
+传
+佛
+供
+便
+倒
+假
+兴
+冠
+冲
+几
+分
+切
+划
+创
+剥
+勒
+区
+华
+单
+卜
+占
+卡
+卷
+厦
+参
+发
+只
+号
+同
+吐
+和
+喝
+圈
+地
+塞
+壳
+处
+奇
+奔
+好
+宁
+宿
+将
+少
+尽
+岗
+差
+巷
+帖
+干
+应
+度
+弹
+强
+当
+待
+得
+恶
+扁
+扇
+扎
+扫
+担
+挑
+据
+撒
+教
+散
+数
+斗
+晃
+曝
+曲
+更
+曾
+朝
+朴
+杆
+查
+校
+模
+横
+没
+泡
+济
+混
+漂
+炸
+熟
+燕
+片
+率
+畜
+的
+盛
+相
+省
+看
+着
+矫
+禁
+种
+称
+空
+答
+粘
+糊
+系
+累
+纤
+结
+给
+缝
+肖
+背
+脏
+舍
+色
+落
+蒙
+薄
+藏
+血
+行
+要
+观
+觉
+角
+解
+说
+调
+踏
+车
+转
+载
+还
+遂
+都
+重
+量
+钻
+铺
+长
+间
+降
+难
+露
+鲜

diffrhythm/g2p/sources/g2p_chinese_model/polydict.json ADDED Viewed

	@@ -0,0 +1,393 @@

+{
+ "1": "丧{sang1}",
+ "2": "丧{sang4}",
+ "3": "中{zhong1}",
+ "4": "中{zhong4}",
+ "5": "为{wei2}",
+ "6": "为{wei4}",
+ "7": "乌{wu1}",
+ "8": "乌{wu4}",
+ "9": "乐{lao4}",
+ "10": "乐{le4}",
+ "11": "乐{le5}",
+ "12": "乐{yao4}",
+ "13": "乐{yve4}",
+ "14": "了{le5}",
+ "15": "了{liao3}",
+ "16": "了{liao5}",
+ "17": "什{shen2}",
+ "18": "什{shi2}",
+ "19": "仔{zai3}",
+ "20": "仔{zai5}",
+ "21": "仔{zi3}",
+ "22": "仔{zi5}",
+ "23": "令{ling2}",
+ "24": "令{ling4}",
+ "25": "任{ren2}",
+ "26": "任{ren4}",
+ "27": "会{hui4}",
+ "28": "会{hui5}",
+ "29": "会{kuai4}",
+ "30": "传{chuan2}",
+ "31": "传{zhuan4}",
+ "32": "佛{fo2}",
+ "33": "佛{fu2}",
+ "34": "供{gong1}",
+ "35": "供{gong4}",
+ "36": "便{bian4}",
+ "37": "便{pian2}",
+ "38": "倒{dao3}",
+ "39": "倒{dao4}",
+ "40": "假{jia3}",
+ "41": "假{jia4}",
+ "42": "兴{xing1}",
+ "43": "兴{xing4}",
+ "44": "冠{guan1}",
+ "45": "冠{guan4}",
+ "46": "冲{chong1}",
+ "47": "冲{chong4}",
+ "48": "几{ji1}",
+ "49": "几{ji2}",
+ "50": "几{ji3}",
+ "51": "分{fen1}",
+ "52": "分{fen4}",
+ "53": "分{fen5}",
+ "54": "切{qie1}",
+ "55": "切{qie4}",
+ "56": "划{hua2}",
+ "57": "划{hua4}",
+ "58": "划{hua5}",
+ "59": "创{chuang1}",
+ "60": "创{chuang4}",
+ "61": "剥{bao1}",
+ "62": "剥{bo1}",
+ "63": "勒{le4}",
+ "64": "勒{le5}",
+ "65": "勒{lei1}",
+ "66": "区{ou1}",
+ "67": "区{qu1}",
+ "68": "华{hua2}",
+ "69": "华{hua4}",
+ "70": "单{chan2}",
+ "71": "单{dan1}",
+ "72": "单{shan4}",
+ "73": "卜{bo5}",
+ "74": "卜{bu3}",
+ "75": "占{zhan1}",
+ "76": "占{zhan4}",
+ "77": "卡{ka2}",
+ "78": "卡{ka3}",
+ "79": "卡{qia3}",
+ "80": "卷{jvan3}",
+ "81": "卷{jvan4}",
+ "82": "厦{sha4}",
+ "83": "厦{xia4}",
+ "84": "参{can1}",
+ "85": "参{cen1}",
+ "86": "参{shen1}",
+ "87": "发{fa1}",
+ "88": "发{fa4}",
+ "89": "发{fa5}",
+ "90": "只{zhi1}",
+ "91": "只{zhi3}",
+ "92": "号{hao2}",
+ "93": "号{hao4}",
+ "94": "号{hao5}",
+ "95": "同{tong2}",
+ "96": "同{tong4}",
+ "97": "同{tong5}",
+ "98": "吐{tu2}",
+ "99": "吐{tu3}",
+ "100": "吐{tu4}",
+ "101": "和{he2}",
+ "102": "和{he4}",
+ "103": "和{he5}",
+ "104": "和{huo2}",
+ "105": "和{huo4}",
+ "106": "和{huo5}",
+ "107": "喝{he1}",
+ "108": "喝{he4}",
+ "109": "圈{jvan4}",
+ "110": "圈{qvan1}",
+ "111": "圈{qvan5}",
+ "112": "地{de5}",
+ "113": "地{di4}",
+ "114": "地{di5}",
+ "115": "塞{sai1}",
+ "116": "塞{sai2}",
+ "117": "塞{sai4}",
+ "118": "塞{se4}",
+ "119": "壳{ke2}",
+ "120": "壳{qiao4}",
+ "121": "处{chu3}",
+ "122": "处{chu4}",
+ "123": "奇{ji1}",
+ "124": "奇{qi2}",
+ "125": "奔{ben1}",
+ "126": "奔{ben4}",
+ "127": "好{hao3}",
+ "128": "好{hao4}",
+ "129": "好{hao5}",
+ "130": "宁{ning2}",
+ "131": "宁{ning4}",
+ "132": "宁{ning5}",
+ "133": "宿{su4}",
+ "134": "宿{xiu3}",
+ "135": "宿{xiu4}",
+ "136": "将{jiang1}",
+ "137": "将{jiang4}",
+ "138": "少{shao3}",
+ "139": "少{shao4}",
+ "140": "尽{jin3}",
+ "141": "尽{jin4}",
+ "142": "岗{gang1}",
+ "143": "岗{gang3}",
+ "144": "差{cha1}",
+ "145": "差{cha4}",
+ "146": "差{chai1}",
+ "147": "差{ci1}",
+ "148": "巷{hang4}",
+ "149": "巷{xiang4}",
+ "150": "帖{tie1}",
+ "151": "帖{tie3}",
+ "152": "帖{tie4}",
+ "153": "干{gan1}",
+ "154": "干{gan4}",
+ "155": "应{ying1}",
+ "156": "应{ying4}",
+ "157": "应{ying5}",
+ "158": "度{du4}",
+ "159": "度{du5}",
+ "160": "度{duo2}",
+ "161": "弹{dan4}",
+ "162": "弹{tan2}",
+ "163": "弹{tan5}",
+ "164": "强{jiang4}",
+ "165": "强{qiang2}",
+ "166": "强{qiang3}",
+ "167": "当{dang1}",
+ "168": "当{dang4}",
+ "169": "当{dang5}",
+ "170": "待{dai1}",
+ "171": "待{dai4}",
+ "172": "得{de2}",
+ "173": "得{de5}",
+ "174": "得{dei3}",
+ "175": "得{dei5}",
+ "176": "恶{e3}",
+ "177": "恶{e4}",
+ "178": "恶{wu4}",
+ "179": "扁{bian3}",
+ "180": "扁{pian1}",
+ "181": "扇{shan1}",
+ "182": "扇{shan4}",
+ "183": "扎{za1}",
+ "184": "扎{zha1}",
+ "185": "扎{zha2}",
+ "186": "扫{sao3}",
+ "187": "扫{sao4}",
+ "188": "担{dan1}",
+ "189": "担{dan4}",
+ "190": "担{dan5}",
+ "191": "挑{tiao1}",
+ "192": "挑{tiao3}",
+ "193": "据{jv1}",
+ "194": "据{jv4}",
+ "195": "撒{sa1}",
+ "196": "撒{sa3}",
+ "197": "撒{sa5}",
+ "198": "教{jiao1}",
+ "199": "教{jiao4}",
+ "200": "散{san3}",
+ "201": "散{san4}",
+ "202": "散{san5}",
+ "203": "数{shu3}",
+ "204": "数{shu4}",
+ "205": "数{shu5}",
+ "206": "斗{dou3}",
+ "207": "斗{dou4}",
+ "208": "晃{huang3}",
+ "209": "曝{bao4}",
+ "210": "曲{qu1}",
+ "211": "曲{qu3}",
+ "212": "更{geng1}",
+ "213": "更{geng4}",
+ "214": "曾{ceng1}",
+ "215": "曾{ceng2}",
+ "216": "曾{zeng1}",
+ "217": "朝{chao2}",
+ "218": "朝{zhao1}",
+ "219": "朴{piao2}",
+ "220": "朴{pu2}",
+ "221": "朴{pu3}",
+ "222": "杆{gan1}",
+ "223": "杆{gan3}",
+ "224": "查{cha2}",
+ "225": "查{zha1}",
+ "226": "校{jiao4}",
+ "227": "校{xiao4}",
+ "228": "模{mo2}",
+ "229": "模{mu2}",
+ "230": "横{heng2}",
+ "231": "横{heng4}",
+ "232": "没{mei2}",
+ "233": "没{mo4}",
+ "234": "泡{pao1}",
+ "235": "泡{pao4}",
+ "236": "泡{pao5}",
+ "237": "济{ji3}",
+ "238": "济{ji4}",
+ "239": "混{hun2}",
+ "240": "混{hun3}",
+ "241": "混{hun4}",
+ "242": "混{hun5}",
+ "243": "漂{piao1}",
+ "244": "漂{piao3}",
+ "245": "漂{piao4}",
+ "246": "炸{zha2}",
+ "247": "炸{zha4}",
+ "248": "熟{shou2}",
+ "249": "熟{shu2}",
+ "250": "燕{yan1}",
+ "251": "燕{yan4}",
+ "252": "片{pian1}",
+ "253": "片{pian4}",
+ "254": "率{lv4}",
+ "255": "率{shuai4}",
+ "256": "畜{chu4}",
+ "257": "畜{xu4}",
+ "258": "的{de5}",
+ "259": "的{di1}",
+ "260": "的{di2}",
+ "261": "的{di4}",
+ "262": "的{di5}",
+ "263": "盛{cheng2}",
+ "264": "盛{sheng4}",
+ "265": "相{xiang1}",
+ "266": "相{xiang4}",
+ "267": "相{xiang5}",
+ "268": "省{sheng3}",
+ "269": "省{xing3}",
+ "270": "看{kan1}",
+ "271": "看{kan4}",
+ "272": "看{kan5}",
+ "273": "着{zhao1}",
+ "274": "着{zhao2}",
+ "275": "着{zhao5}",
+ "276": "着{zhe5}",
+ "277": "着{zhuo2}",
+ "278": "着{zhuo5}",
+ "279": "矫{jiao3}",
+ "280": "禁{jin1}",
+ "281": "禁{jin4}",
+ "282": "种{zhong3}",
+ "283": "种{zhong4}",
+ "284": "称{chen4}",
+ "285": "称{cheng1}",
+ "286": "空{kong1}",
+ "287": "空{kong4}",
+ "288": "答{da1}",
+ "289": "答{da2}",
+ "290": "粘{nian2}",
+ "291": "粘{zhan1}",
+ "292": "糊{hu2}",
+ "293": "糊{hu5}",
+ "294": "系{ji4}",
+ "295": "系{xi4}",
+ "296": "系{xi5}",
+ "297": "累{lei2}",
+ "298": "累{lei3}",
+ "299": "累{lei4}",
+ "300": "累{lei5}",
+ "301": "纤{qian4}",
+ "302": "纤{xian1}",
+ "303": "结{jie1}",
+ "304": "结{jie2}",
+ "305": "结{jie5}",
+ "306": "给{gei3}",
+ "307": "给{gei5}",
+ "308": "给{ji3}",
+ "309": "缝{feng2}",
+ "310": "缝{feng4}",
+ "311": "缝{feng5}",
+ "312": "肖{xiao1}",
+ "313": "肖{xiao4}",
+ "314": "背{bei1}",
+ "315": "背{bei4}",
+ "316": "脏{zang1}",
+ "317": "脏{zang4}",
+ "318": "舍{she3}",
+ "319": "舍{she4}",
+ "320": "色{se4}",
+ "321": "色{shai3}",
+ "322": "落{lao4}",
+ "323": "落{luo4}",
+ "324": "蒙{meng1}",
+ "325": "蒙{meng2}",
+ "326": "蒙{meng3}",
+ "327": "薄{bao2}",
+ "328": "薄{bo2}",
+ "329": "薄{bo4}",
+ "330": "藏{cang2}",
+ "331": "藏{zang4}",
+ "332": "血{xie3}",
+ "333": "血{xue4}",
+ "334": "行{hang2}",
+ "335": "行{hang5}",
+ "336": "行{heng5}",
+ "337": "行{xing2}",
+ "338": "行{xing4}",
+ "339": "要{yao1}",
+ "340": "要{yao4}",
+ "341": "观{guan1}",
+ "342": "观{guan4}",
+ "343": "觉{jiao4}",
+ "344": "觉{jiao5}",
+ "345": "觉{jve2}",
+ "346": "角{jiao3}",
+ "347": "角{jve2}",
+ "348": "解{jie3}",
+ "349": "解{jie4}",
+ "350": "解{xie4}",
+ "351": "说{shui4}",
+ "352": "说{shuo1}",
+ "353": "调{diao4}",
+ "354": "调{tiao2}",
+ "355": "踏{ta1}",
+ "356": "踏{ta4}",
+ "357": "车{che1}",
+ "358": "车{jv1}",
+ "359": "转{zhuan3}",
+ "360": "转{zhuan4}",
+ "361": "载{zai3}",
+ "362": "载{zai4}",
+ "363": "还{hai2}",
+ "364": "还{huan2}",
+ "365": "遂{sui2}",
+ "366": "遂{sui4}",
+ "367": "都{dou1}",
+ "368": "都{du1}",
+ "369": "重{chong2}",
+ "370": "重{zhong4}",
+ "371": "量{liang2}",
+ "372": "量{liang4}",
+ "373": "量{liang5}",
+ "374": "钻{zuan1}",
+ "375": "钻{zuan4}",
+ "376": "铺{pu1}",
+ "377": "铺{pu4}",
+ "378": "长{chang2}",
+ "379": "长{chang3}",
+ "380": "长{zhang3}",
+ "381": "间{jian1}",
+ "382": "间{jian4}",
+ "383": "降{jiang4}",
+ "384": "降{xiang2}",
+ "385": "难{nan2}",
+ "386": "难{nan4}",
+ "387": "难{nan5}",
+ "388": "露{lou4}",
+ "389": "露{lu4}",
+ "390": "鲜{xian1}",
+ "391": "鲜{xian3}"
+}

diffrhythm/g2p/sources/g2p_chinese_model/polydict_r.json ADDED Viewed

	@@ -0,0 +1,393 @@

+{
+ "丧{sang1}": 1,
+ "丧{sang4}": 2,
+ "中{zhong1}": 3,
+ "中{zhong4}": 4,
+ "为{wei2}": 5,
+ "为{wei4}": 6,
+ "乌{wu1}": 7,
+ "乌{wu4}": 8,
+ "乐{lao4}": 9,
+ "乐{le4}": 10,
+ "乐{le5}": 11,
+ "乐{yao4}": 12,
+ "乐{yve4}": 13,
+ "了{le5}": 14,
+ "了{liao3}": 15,
+ "了{liao5}": 16,
+ "什{shen2}": 17,
+ "什{shi2}": 18,
+ "仔{zai3}": 19,
+ "仔{zai5}": 20,
+ "仔{zi3}": 21,
+ "仔{zi5}": 22,
+ "令{ling2}": 23,
+ "令{ling4}": 24,
+ "任{ren2}": 25,
+ "任{ren4}": 26,
+ "会{hui4}": 27,
+ "会{hui5}": 28,
+ "会{kuai4}": 29,
+ "传{chuan2}": 30,
+ "传{zhuan4}": 31,
+ "佛{fo2}": 32,
+ "佛{fu2}": 33,
+ "供{gong1}": 34,
+ "供{gong4}": 35,
+ "便{bian4}": 36,
+ "便{pian2}": 37,
+ "倒{dao3}": 38,
+ "倒{dao4}": 39,
+ "假{jia3}": 40,
+ "假{jia4}": 41,
+ "兴{xing1}": 42,
+ "兴{xing4}": 43,
+ "冠{guan1}": 44,
+ "冠{guan4}": 45,
+ "冲{chong1}": 46,
+ "冲{chong4}": 47,
+ "几{ji1}": 48,
+ "几{ji2}": 49,
+ "几{ji3}": 50,
+ "分{fen1}": 51,
+ "分{fen4}": 52,
+ "分{fen5}": 53,
+ "切{qie1}": 54,
+ "切{qie4}": 55,
+ "划{hua2}": 56,
+ "划{hua4}": 57,
+ "划{hua5}": 58,
+ "创{chuang1}": 59,
+ "创{chuang4}": 60,
+ "剥{bao1}": 61,
+ "剥{bo1}": 62,
+ "勒{le4}": 63,
+ "勒{le5}": 64,
+ "勒{lei1}": 65,
+ "区{ou1}": 66,
+ "区{qu1}": 67,
+ "华{hua2}": 68,
+ "华{hua4}": 69,
+ "单{chan2}": 70,
+ "单{dan1}": 71,
+ "单{shan4}": 72,
+ "卜{bo5}": 73,
+ "卜{bu3}": 74,
+ "占{zhan1}": 75,
+ "占{zhan4}": 76,
+ "卡{ka2}": 77,
+ "卡{ka3}": 78,
+ "卡{qia3}": 79,
+ "卷{jvan3}": 80,
+ "卷{jvan4}": 81,
+ "厦{sha4}": 82,
+ "厦{xia4}": 83,
+ "参{can1}": 84,
+ "参{cen1}": 85,
+ "参{shen1}": 86,
+ "发{fa1}": 87,
+ "发{fa4}": 88,
+ "发{fa5}": 89,
+ "只{zhi1}": 90,
+ "只{zhi3}": 91,
+ "号{hao2}": 92,
+ "号{hao4}": 93,
+ "号{hao5}": 94,
+ "同{tong2}": 95,
+ "同{tong4}": 96,
+ "同{tong5}": 97,
+ "吐{tu2}": 98,
+ "吐{tu3}": 99,
+ "吐{tu4}": 100,
+ "和{he2}": 101,
+ "和{he4}": 102,
+ "和{he5}": 103,
+ "和{huo2}": 104,
+ "和{huo4}": 105,
+ "和{huo5}": 106,
+ "喝{he1}": 107,
+ "喝{he4}": 108,
+ "圈{jvan4}": 109,
+ "圈{qvan1}": 110,
+ "圈{qvan5}": 111,
+ "地{de5}": 112,
+ "地{di4}": 113,
+ "地{di5}": 114,
+ "塞{sai1}": 115,
+ "塞{sai2}": 116,
+ "塞{sai4}": 117,
+ "塞{se4}": 118,
+ "壳{ke2}": 119,
+ "壳{qiao4}": 120,
+ "处{chu3}": 121,
+ "处{chu4}": 122,
+ "奇{ji1}": 123,
+ "奇{qi2}": 124,
+ "奔{ben1}": 125,
+ "奔{ben4}": 126,
+ "好{hao3}": 127,
+ "好{hao4}": 128,
+ "好{hao5}": 129,
+ "宁{ning2}": 130,
+ "宁{ning4}": 131,
+ "宁{ning5}": 132,
+ "宿{su4}": 133,
+ "宿{xiu3}": 134,
+ "宿{xiu4}": 135,
+ "将{jiang1}": 136,
+ "将{jiang4}": 137,
+ "少{shao3}": 138,
+ "少{shao4}": 139,
+ "尽{jin3}": 140,
+ "尽{jin4}": 141,
+ "岗{gang1}": 142,
+ "岗{gang3}": 143,
+ "差{cha1}": 144,
+ "差{cha4}": 145,
+ "差{chai1}": 146,
+ "差{ci1}": 147,
+ "巷{hang4}": 148,
+ "巷{xiang4}": 149,
+ "帖{tie1}": 150,
+ "帖{tie3}": 151,
+ "帖{tie4}": 152,
+ "干{gan1}": 153,
+ "干{gan4}": 154,
+ "应{ying1}": 155,
+ "应{ying4}": 156,
+ "应{ying5}": 157,
+ "度{du4}": 158,
+ "度{du5}": 159,
+ "度{duo2}": 160,
+ "弹{dan4}": 161,
+ "弹{tan2}": 162,
+ "弹{tan5}": 163,
+ "强{jiang4}": 164,
+ "强{qiang2}": 165,
+ "强{qiang3}": 166,
+ "当{dang1}": 167,
+ "当{dang4}": 168,
+ "当{dang5}": 169,
+ "待{dai1}": 170,
+ "待{dai4}": 171,
+ "得{de2}": 172,
+ "得{de5}": 173,
+ "得{dei3}": 174,
+ "得{dei5}": 175,
+ "恶{e3}": 176,
+ "恶{e4}": 177,
+ "恶{wu4}": 178,
+ "扁{bian3}": 179,
+ "扁{pian1}": 180,
+ "扇{shan1}": 181,
+ "扇{shan4}": 182,
+ "扎{za1}": 183,
+ "扎{zha1}": 184,
+ "扎{zha2}": 185,
+ "扫{sao3}": 186,
+ "扫{sao4}": 187,
+ "担{dan1}": 188,
+ "担{dan4}": 189,
+ "担{dan5}": 190,
+ "挑{tiao1}": 191,
+ "挑{tiao3}": 192,
+ "据{jv1}": 193,
+ "据{jv4}": 194,
+ "撒{sa1}": 195,
+ "撒{sa3}": 196,
+ "撒{sa5}": 197,
+ "教{jiao1}": 198,
+ "教{jiao4}": 199,
+ "散{san3}": 200,
+ "散{san4}": 201,
+ "散{san5}": 202,
+ "数{shu3}": 203,
+ "数{shu4}": 204,
+ "数{shu5}": 205,
+ "斗{dou3}": 206,
+ "斗{dou4}": 207,
+ "晃{huang3}": 208,
+ "曝{bao4}": 209,
+ "曲{qu1}": 210,
+ "曲{qu3}": 211,
+ "更{geng1}": 212,
+ "更{geng4}": 213,
+ "曾{ceng1}": 214,
+ "曾{ceng2}": 215,
+ "曾{zeng1}": 216,
+ "朝{chao2}": 217,
+ "朝{zhao1}": 218,
+ "朴{piao2}": 219,
+ "朴{pu2}": 220,
+ "朴{pu3}": 221,
+ "杆{gan1}": 222,
+ "杆{gan3}": 223,
+ "查{cha2}": 224,
+ "查{zha1}": 225,
+ "校{jiao4}": 226,
+ "校{xiao4}": 227,
+ "模{mo2}": 228,
+ "模{mu2}": 229,
+ "横{heng2}": 230,
+ "横{heng4}": 231,
+ "没{mei2}": 232,
+ "没{mo4}": 233,
+ "泡{pao1}": 234,
+ "泡{pao4}": 235,
+ "泡{pao5}": 236,
+ "济{ji3}": 237,
+ "济{ji4}": 238,
+ "混{hun2}": 239,
+ "混{hun3}": 240,
+ "混{hun4}": 241,
+ "混{hun5}": 242,
+ "漂{piao1}": 243,
+ "漂{piao3}": 244,
+ "漂{piao4}": 245,
+ "炸{zha2}": 246,
+ "炸{zha4}": 247,
+ "熟{shou2}": 248,
+ "熟{shu2}": 249,
+ "燕{yan1}": 250,
+ "燕{yan4}": 251,
+ "片{pian1}": 252,
+ "片{pian4}": 253,
+ "率{lv4}": 254,
+ "率{shuai4}": 255,
+ "畜{chu4}": 256,
+ "畜{xu4}": 257,
+ "的{de5}": 258,
+ "的{di1}": 259,
+ "的{di2}": 260,
+ "的{di4}": 261,
+ "的{di5}": 262,
+ "盛{cheng2}": 263,
+ "盛{sheng4}": 264,
+ "相{xiang1}": 265,
+ "相{xiang4}": 266,
+ "相{xiang5}": 267,
+ "省{sheng3}": 268,
+ "省{xing3}": 269,
+ "看{kan1}": 270,
+ "看{kan4}": 271,
+ "看{kan5}": 272,
+ "着{zhao1}": 273,
+ "着{zhao2}": 274,
+ "着{zhao5}": 275,
+ "着{zhe5}": 276,
+ "着{zhuo2}": 277,
+ "着{zhuo5}": 278,
+ "矫{jiao3}": 279,
+ "禁{jin1}": 280,
+ "禁{jin4}": 281,
+ "种{zhong3}": 282,
+ "种{zhong4}": 283,
+ "称{chen4}": 284,
+ "称{cheng1}": 285,
+ "空{kong1}": 286,
+ "空{kong4}": 287,
+ "答{da1}": 288,
+ "答{da2}": 289,
+ "粘{nian2}": 290,
+ "粘{zhan1}": 291,
+ "糊{hu2}": 292,
+ "糊{hu5}": 293,
+ "系{ji4}": 294,
+ "系{xi4}": 295,
+ "系{xi5}": 296,
+ "累{lei2}": 297,
+ "累{lei3}": 298,
+ "累{lei4}": 299,
+ "累{lei5}": 300,
+ "纤{qian4}": 301,
+ "纤{xian1}": 302,
+ "结{jie1}": 303,
+ "结{jie2}": 304,
+ "结{jie5}": 305,
+ "给{gei3}": 306,
+ "给{gei5}": 307,
+ "给{ji3}": 308,
+ "缝{feng2}": 309,
+ "缝{feng4}": 310,
+ "缝{feng5}": 311,
+ "肖{xiao1}": 312,
+ "肖{xiao4}": 313,
+ "背{bei1}": 314,
+ "背{bei4}": 315,
+ "脏{zang1}": 316,
+ "脏{zang4}": 317,
+ "舍{she3}": 318,
+ "舍{she4}": 319,
+ "色{se4}": 320,
+ "色{shai3}": 321,
+ "落{lao4}": 322,
+ "落{luo4}": 323,
+ "蒙{meng1}": 324,
+ "蒙{meng2}": 325,
+ "蒙{meng3}": 326,
+ "薄{bao2}": 327,
+ "薄{bo2}": 328,
+ "薄{bo4}": 329,
+ "藏{cang2}": 330,
+ "藏{zang4}": 331,
+ "血{xie3}": 332,
+ "血{xue4}": 333,
+ "行{hang2}": 334,
+ "行{hang5}": 335,
+ "行{heng5}": 336,
+ "行{xing2}": 337,
+ "行{xing4}": 338,
+ "要{yao1}": 339,
+ "要{yao4}": 340,
+ "观{guan1}": 341,
+ "观{guan4}": 342,
+ "觉{jiao4}": 343,
+ "觉{jiao5}": 344,
+ "觉{jve2}": 345,
+ "角{jiao3}": 346,
+ "角{jve2}": 347,
+ "解{jie3}": 348,
+ "解{jie4}": 349,
+ "解{xie4}": 350,
+ "说{shui4}": 351,
+ "说{shuo1}": 352,
+ "调{diao4}": 353,
+ "调{tiao2}": 354,
+ "踏{ta1}": 355,
+ "踏{ta4}": 356,
+ "车{che1}": 357,
+ "车{jv1}": 358,
+ "转{zhuan3}": 359,
+ "转{zhuan4}": 360,
+ "载{zai3}": 361,
+ "载{zai4}": 362,
+ "还{hai2}": 363,
+ "还{huan2}": 364,
+ "遂{sui2}": 365,
+ "遂{sui4}": 366,
+ "都{dou1}": 367,
+ "都{du1}": 368,
+ "重{chong2}": 369,
+ "重{zhong4}": 370,
+ "量{liang2}": 371,
+ "量{liang4}": 372,
+ "量{liang5}": 373,
+ "钻{zuan1}": 374,
+ "钻{zuan4}": 375,
+ "铺{pu1}": 376,
+ "铺{pu4}": 377,
+ "长{chang2}": 378,
+ "长{chang3}": 379,
+ "长{zhang3}": 380,
+ "间{jian1}": 381,
+ "间{jian4}": 382,
+ "降{jiang4}": 383,
+ "降{xiang2}": 384,
+ "难{nan2}": 385,
+ "难{nan4}": 386,
+ "难{nan5}": 387,
+ "露{lou4}": 388,
+ "露{lu4}": 389,
+ "鲜{xian1}": 390,
+ "鲜{xian3}": 391
+}

diffrhythm/g2p/sources/g2p_chinese_model/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

diffrhythm/g2p/sources/pinyin_2_bpmf.txt ADDED Viewed

	@@ -0,0 +1,429 @@

+a	ㄚ
+ai	ㄞ
+an	ㄢ
+ang	ㄤ
+ao	ㄠ
+ba	ㄅㄚ
+bai	ㄅㄞ
+ban	ㄅㄢ
+bang	ㄅㄤ
+bao	ㄅㄠ
+bei	ㄅㄟ
+ben	ㄅㄣ
+beng	ㄅㄥ
+bi	ㄅㄧ
+bian	ㄅㄧㄢ
+biang	ㄅㄧㄤ
+biao	ㄅㄧㄠ
+bie	ㄅㄧㄝ
+bin	ㄅㄧㄣ
+bing	ㄅㄧㄥ
+bo	ㄅㄛ
+bu	ㄅㄨ
+ca	ㄘㄚ
+cai	ㄘㄞ
+can	ㄘㄢ
+cang	ㄘㄤ
+cao	ㄘㄠ
+ce	ㄘㄜ
+cen	ㄘㄣ
+ceng	ㄘㄥ
+cha	ㄔㄚ
+chai	ㄔㄞ
+chan	ㄔㄢ
+chang	ㄔㄤ
+chao	ㄔㄠ
+che	ㄔㄜ
+chen	ㄔㄣ
+cheng	ㄔㄥ
+chi	ㄔ
+chong	ㄔㄨㄥ
+chou	ㄔㄡ
+chu	ㄔㄨ
+chua	ㄔㄨㄚ
+chuai	ㄔㄨㄞ
+chuan	ㄔㄨㄢ
+chuang	ㄔㄨㄤ
+chui	ㄔㄨㄟ
+chun	ㄔㄨㄣ
+chuo	ㄔㄨㄛ
+ci	ㄘ
+cong	ㄘㄨㄥ
+cou	ㄘㄡ
+cu	ㄘㄨ
+cuan	ㄘㄨㄢ
+cui	ㄘㄨㄟ
+cun	ㄘㄨㄣ
+cuo	ㄘㄨㄛ
+da	ㄉㄚ
+dai	ㄉㄞ
+dan	ㄉㄢ
+dang	ㄉㄤ
+dao	ㄉㄠ
+de	ㄉㄜ
+dei	ㄉㄟ
+den	ㄉㄣ
+deng	ㄉㄥ
+di	ㄉㄧ
+dia	ㄉㄧㄚ
+dian	ㄉㄧㄢ
+diao	ㄉㄧㄠ
+die	ㄉㄧㄝ
+din	ㄉㄧㄣ
+ding	ㄉㄧㄥ
+diu	ㄉㄧㄡ
+dong	ㄉㄨㄥ
+dou	ㄉㄡ
+du	ㄉㄨ
+duan	ㄉㄨㄢ
+dui	ㄉㄨㄟ
+dun	ㄉㄨㄣ
+duo	ㄉㄨㄛ
+e	ㄜ
+ei	ㄟ
+en	ㄣ
+eng	ㄥ
+er	ㄦ
+fa	ㄈㄚ
+fan	ㄈㄢ
+fang	ㄈㄤ
+fei	ㄈㄟ
+fen	ㄈㄣ
+feng	ㄈㄥ
+fo	ㄈㄛ
+fou	ㄈㄡ
+fu	ㄈㄨ
+ga	ㄍㄚ
+gai	ㄍㄞ
+gan	ㄍㄢ
+gang	ㄍㄤ
+gao	ㄍㄠ
+ge	ㄍㄜ
+gei	ㄍㄟ
+gen	ㄍㄣ
+geng	ㄍㄥ
+gong	ㄍㄨㄥ
+gou	ㄍㄡ
+gu	ㄍㄨ
+gua	ㄍㄨㄚ
+guai	ㄍㄨㄞ
+guan	ㄍㄨㄢ
+guang	ㄍㄨㄤ
+gui	ㄍㄨㄟ
+gun	ㄍㄨㄣ
+guo	ㄍㄨㄛ
+ha	ㄏㄚ
+hai	ㄏㄞ
+han	ㄏㄢ
+hang	ㄏㄤ
+hao	ㄏㄠ
+he	ㄏㄜ
+hei	ㄏㄟ
+hen	ㄏㄣ
+heng	ㄏㄥ
+hm	ㄏㄇ
+hong	ㄏㄨㄥ
+hou	ㄏㄡ
+hu	ㄏㄨ
+hua	ㄏㄨㄚ
+huai	ㄏㄨㄞ
+huan	ㄏㄨㄢ
+huang	ㄏㄨㄤ
+hui	ㄏㄨㄟ
+hun	ㄏㄨㄣ
+huo	ㄏㄨㄛ
+ji	ㄐㄧ
+jia	ㄐㄧㄚ
+jian	ㄐㄧㄢ
+jiang	ㄐㄧㄤ
+jiao	ㄐㄧㄠ
+jie	ㄐㄧㄝ
+jin	ㄐㄧㄣ
+jing	ㄐㄧㄥ
+jiong	ㄐㄩㄥ
+jiu	ㄐㄧㄡ
+ju	ㄐㄩ
+jv	ㄐㄩ
+juan	ㄐㄩㄢ
+jvan	ㄐㄩㄢ
+jue	ㄐㄩㄝ
+jve	ㄐㄩㄝ
+jun	ㄐㄩㄣ
+ka	ㄎㄚ
+kai	ㄎㄞ
+kan	ㄎㄢ
+kang	ㄎㄤ
+kao	ㄎㄠ
+ke	ㄎㄜ
+kei	ㄎㄟ
+ken	ㄎㄣ
+keng	ㄎㄥ
+kong	ㄎㄨㄥ
+kou	ㄎㄡ
+ku	ㄎㄨ
+kua	ㄎㄨㄚ
+kuai	ㄎㄨㄞ
+kuan	ㄎㄨㄢ
+kuang	ㄎㄨㄤ
+kui	ㄎㄨㄟ
+kun	ㄎㄨㄣ
+kuo	ㄎㄨㄛ
+la	ㄌㄚ
+lai	ㄌㄞ
+lan	ㄌㄢ
+lang	ㄌㄤ
+lao	ㄌㄠ
+le	ㄌㄜ
+lei	ㄌㄟ
+leng	ㄌㄥ
+li	ㄌㄧ
+lia	ㄌㄧㄚ
+lian	ㄌㄧㄢ
+liang	ㄌㄧㄤ
+liao	ㄌㄧㄠ
+lie	ㄌㄧㄝ
+lin	ㄌㄧㄣ
+ling	ㄌㄧㄥ
+liu	ㄌㄧㄡ
+lo	ㄌㄛ
+long	ㄌㄨㄥ
+lou	ㄌㄡ
+lu	ㄌㄨ
+luan	ㄌㄨㄢ
+lue	ㄌㄩㄝ
+lun	ㄌㄨㄣ
+luo	ㄌㄨㄛ
+lv	ㄌㄩ
+lve	ㄌㄩㄝ
+m	ㄇㄨ
+ma	ㄇㄚ
+mai	ㄇㄞ
+man	ㄇㄢ
+mang	ㄇㄤ
+mao	ㄇㄠ
+me	ㄇㄜ
+mei	ㄇㄟ
+men	ㄇㄣ
+meng	ㄇㄥ
+mi	ㄇㄧ
+mian	ㄇㄧㄢ
+miao	ㄇㄧㄠ
+mie	ㄇㄧㄝ
+min	ㄇㄧㄣ
+ming	ㄇㄧㄥ
+miu	ㄇㄧㄡ
+mo	ㄇㄛ
+mou	ㄇㄡ
+mu	ㄇㄨ
+n	ㄣ
+na	ㄋㄚ
+nai	ㄋㄞ
+nan	ㄋㄢ
+nang	ㄋㄤ
+nao	ㄋㄠ
+ne	ㄋㄜ
+nei	ㄋㄟ
+nen	ㄋㄣ
+neng	ㄋㄥ
+ng	ㄣ
+ni	ㄋㄧ
+nian	ㄋㄧㄢ
+niang	ㄋㄧㄤ
+niao	ㄋㄧㄠ
+nie	ㄋㄧㄝ
+nin	ㄋㄧㄣ
+ning	ㄋㄧㄥ
+niu	ㄋㄧㄡ
+nong	ㄋㄨㄥ
+nou	ㄋㄡ
+nu	ㄋㄨ
+nuan	ㄋㄨㄢ
+nue	ㄋㄩㄝ
+nun	ㄋㄨㄣ
+nuo	ㄋㄨㄛ
+nv	ㄋㄩ
+nve	ㄋㄩㄝ
+o	ㄛ
+ou	ㄡ
+pa	ㄆㄚ
+pai	ㄆㄞ
+pan	ㄆㄢ
+pang	ㄆㄤ
+pao	ㄆㄠ
+pei	ㄆㄟ
+pen	ㄆㄣ
+peng	ㄆㄥ
+pi	ㄆㄧ
+pian	ㄆㄧㄢ
+piao	ㄆㄧㄠ
+pie	ㄆㄧㄝ
+pin	ㄆㄧㄣ
+ping	ㄆㄧㄥ
+po	ㄆㄛ
+pou	ㄆㄡ
+pu	ㄆㄨ
+qi	ㄑㄧ
+qia	ㄑㄧㄚ
+qian	ㄑㄧㄢ
+qiang	ㄑㄧㄤ
+qiao	ㄑㄧㄠ
+qie	ㄑㄧㄝ
+qin	ㄑㄧㄣ
+qing	ㄑㄧㄥ
+qiong	ㄑㄩㄥ
+qiu	ㄑㄧㄡ
+qu	ㄑㄩ
+quan	ㄑㄩㄢ
+qvan	ㄑㄩㄢ
+que	ㄑㄩㄝ
+qun	ㄑㄩㄣ
+ran	ㄖㄢ
+rang	ㄖㄤ
+rao	ㄖㄠ
+re	ㄖㄜ
+ren	ㄖㄣ
+reng	ㄖㄥ
+ri	ㄖ
+rong	ㄖㄨㄥ
+rou	ㄖㄡ
+ru	ㄖㄨ
+rua	ㄖㄨㄚ
+ruan	ㄖㄨㄢ
+rui	ㄖㄨㄟ
+run	ㄖㄨㄣ
+ruo	ㄖㄨㄛ
+sa	ㄙㄚ
+sai	ㄙㄞ
+san	ㄙㄢ
+sang	ㄙㄤ
+sao	ㄙㄠ
+se	ㄙㄜ
+sen	ㄙㄣ
+seng	ㄙㄥ
+sha	ㄕㄚ
+shai	ㄕㄞ
+shan	ㄕㄢ
+shang	ㄕㄤ
+shao	ㄕㄠ
+she	ㄕㄜ
+shei	ㄕㄟ
+shen	ㄕㄣ
+sheng	ㄕㄥ
+shi	ㄕ
+shou	ㄕㄡ
+shu	ㄕㄨ
+shua	ㄕㄨㄚ
+shuai	ㄕㄨㄞ
+shuan	ㄕㄨㄢ
+shuang	ㄕㄨㄤ
+shui	ㄕㄨㄟ
+shun	ㄕㄨㄣ
+shuo	ㄕㄨㄛ
+si	ㄙ
+song	ㄙㄨㄥ
+sou	ㄙㄡ
+su	ㄙㄨ
+suan	ㄙㄨㄢ
+sui	ㄙㄨㄟ
+sun	ㄙㄨㄣ
+suo	ㄙㄨㄛ
+ta	ㄊㄚ
+tai	ㄊㄞ
+tan	ㄊㄢ
+tang	ㄊㄤ
+tao	ㄊㄠ
+te	ㄊㄜ
+tei	ㄊㄟ
+teng	ㄊㄥ
+ti	ㄊㄧ
+tian	ㄊㄧㄢ
+tiao	ㄊㄧㄠ
+tie	ㄊㄧㄝ
+ting	ㄊㄧㄥ
+tong	ㄊㄨㄥ
+tou	ㄊㄡ
+tsuo	ㄘㄨㄛ
+tu	ㄊㄨ
+tuan	ㄊㄨㄢ
+tui	ㄊㄨㄟ
+tun	ㄊㄨㄣ
+tuo	ㄊㄨㄛ
+tzan	ㄗㄢ
+wa	ㄨㄚ
+wai	ㄨㄞ
+wan	ㄨㄢ
+wang	ㄨㄤ
+wei	ㄨㄟ
+wen	ㄨㄣ
+weng	ㄨㄥ
+wo	ㄨㄛ
+wong	ㄨㄥ
+wu	ㄨ
+xi	ㄒㄧ
+xia	ㄒㄧㄚ
+xian	ㄒㄧㄢ
+xiang	ㄒㄧㄤ
+xiao	ㄒㄧㄠ
+xie	ㄒㄧㄝ
+xin	ㄒㄧㄣ
+xing	ㄒㄧㄥ
+xiong	ㄒㄩㄥ
+xiu	ㄒㄧㄡ
+xu	ㄒㄩ
+xuan	ㄒㄩㄢ
+xue	ㄒㄩㄝ
+xun	ㄒㄩㄣ
+ya	ㄧㄚ
+yai	ㄧㄞ
+yan	ㄧㄢ
+yang	ㄧㄤ
+yao	ㄧㄠ
+ye	ㄧㄝ
+yi	ㄧ
+yin	ㄧㄣ
+ying	ㄧㄥ
+yo	ㄧㄛ
+yong	ㄩㄥ
+you	ㄧㄡ
+yu	ㄩ
+yuan	ㄩㄢ
+yue	ㄩㄝ
+yve	ㄩㄝ
+yun	ㄩㄣ
+za	ㄗㄚ
+zai	ㄗㄞ
+zan	ㄗㄢ
+zang	ㄗㄤ
+zao	ㄗㄠ
+ze	ㄗㄜ
+zei	ㄗㄟ
+zen	ㄗㄣ
+zeng	ㄗㄥ
+zha	ㄓㄚ
+zhai	ㄓㄞ
+zhan	ㄓㄢ
+zhang	ㄓㄤ
+zhao	ㄓㄠ
+zhe	ㄓㄜ
+zhei	ㄓㄟ
+zhen	ㄓㄣ
+zheng	ㄓㄥ
+zhi	ㄓ
+zhong	ㄓㄨㄥ
+zhou	ㄓㄡ
+zhu	ㄓㄨ
+zhua	ㄓㄨㄚ
+zhuai	ㄓㄨㄞ
+zhuan	ㄓㄨㄢ
+zhuang	ㄓㄨㄤ
+zhui	ㄓㄨㄟ
+zhun	ㄓㄨㄣ
+zhuo	ㄓㄨㄛ
+zi	ㄗ
+zong	ㄗㄨㄥ
+zou	ㄗㄡ
+zu	ㄗㄨ
+zuan	ㄗㄨㄢ
+zui	ㄗㄨㄟ
+zun	ㄗㄨㄣ
+zuo	ㄗㄨㄛ

diffrhythm/g2p/utils/__pycache__/front_utils.cpython-310.pyc ADDED Viewed

Binary file (583 Bytes). View file

diffrhythm/g2p/utils/__pycache__/front_utils.cpython-311.pyc ADDED Viewed

Binary file (960 Bytes). View file

diffrhythm/g2p/utils/__pycache__/g2p.cpython-310.pyc ADDED Viewed

Binary file (2.78 kB). View file