Spaces:
Build error
Build error
Upload 82 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- diffrhythm/.DS_Store +0 -0
- diffrhythm/config/defaults.ini +94 -0
- diffrhythm/config/diffrhythm-1b.json +13 -0
- diffrhythm/g2p/__pycache__/g2p_generation.cpython-310.pyc +0 -0
- diffrhythm/g2p/__pycache__/g2p_generation.cpython-311.pyc +0 -0
- diffrhythm/g2p/g2p/__init__.py +87 -0
- diffrhythm/g2p/g2p/__pycache__/__init__.cpython-310.pyc +0 -0
- diffrhythm/g2p/g2p/__pycache__/__init__.cpython-311.pyc +0 -0
- diffrhythm/g2p/g2p/__pycache__/chinese_model_g2p.cpython-310.pyc +0 -0
- diffrhythm/g2p/g2p/__pycache__/chinese_model_g2p.cpython-311.pyc +0 -0
- diffrhythm/g2p/g2p/__pycache__/cleaners.cpython-310.pyc +0 -0
- diffrhythm/g2p/g2p/__pycache__/cleaners.cpython-311.pyc +0 -0
- diffrhythm/g2p/g2p/__pycache__/english.cpython-310.pyc +0 -0
- diffrhythm/g2p/g2p/__pycache__/english.cpython-311.pyc +0 -0
- diffrhythm/g2p/g2p/__pycache__/french.cpython-310.pyc +0 -0
- diffrhythm/g2p/g2p/__pycache__/french.cpython-311.pyc +0 -0
- diffrhythm/g2p/g2p/__pycache__/german.cpython-310.pyc +0 -0
- diffrhythm/g2p/g2p/__pycache__/german.cpython-311.pyc +0 -0
- diffrhythm/g2p/g2p/__pycache__/japanese.cpython-310.pyc +0 -0
- diffrhythm/g2p/g2p/__pycache__/japanese.cpython-311.pyc +0 -0
- diffrhythm/g2p/g2p/__pycache__/korean.cpython-310.pyc +0 -0
- diffrhythm/g2p/g2p/__pycache__/korean.cpython-311.pyc +0 -0
- diffrhythm/g2p/g2p/__pycache__/mandarin.cpython-310.pyc +0 -0
- diffrhythm/g2p/g2p/__pycache__/mandarin.cpython-311.pyc +0 -0
- diffrhythm/g2p/g2p/__pycache__/text_tokenizers.cpython-310.pyc +0 -0
- diffrhythm/g2p/g2p/__pycache__/text_tokenizers.cpython-311.pyc +0 -0
- diffrhythm/g2p/g2p/chinese_model_g2p.py +213 -0
- diffrhythm/g2p/g2p/cleaners.py +31 -0
- diffrhythm/g2p/g2p/english.py +202 -0
- diffrhythm/g2p/g2p/french.py +149 -0
- diffrhythm/g2p/g2p/german.py +94 -0
- diffrhythm/g2p/g2p/japanese.py +816 -0
- diffrhythm/g2p/g2p/korean.py +81 -0
- diffrhythm/g2p/g2p/mandarin.py +600 -0
- diffrhythm/g2p/g2p/text_tokenizers.py +85 -0
- diffrhythm/g2p/g2p/vocab.json +372 -0
- diffrhythm/g2p/g2p_generation.py +133 -0
- diffrhythm/g2p/sources/bpmf_2_pinyin.txt +41 -0
- diffrhythm/g2p/sources/chinese_lexicon.txt +3 -0
- diffrhythm/g2p/sources/g2p_chinese_model/config.json +819 -0
- diffrhythm/g2p/sources/g2p_chinese_model/poly_bert_model.onnx +3 -0
- diffrhythm/g2p/sources/g2p_chinese_model/polychar.txt +159 -0
- diffrhythm/g2p/sources/g2p_chinese_model/polydict.json +393 -0
- diffrhythm/g2p/sources/g2p_chinese_model/polydict_r.json +393 -0
- diffrhythm/g2p/sources/g2p_chinese_model/vocab.txt +0 -0
- diffrhythm/g2p/sources/pinyin_2_bpmf.txt +429 -0
- diffrhythm/g2p/utils/__pycache__/front_utils.cpython-310.pyc +0 -0
- diffrhythm/g2p/utils/__pycache__/front_utils.cpython-311.pyc +0 -0
- diffrhythm/g2p/utils/__pycache__/g2p.cpython-310.pyc +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
diffrhythm/g2p/sources/chinese_lexicon.txt filter=lfs diff=lfs merge=lfs -text
|
diffrhythm/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
diffrhythm/config/defaults.ini
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
[DEFAULTS]
|
3 |
+
|
4 |
+
#name of the run
|
5 |
+
exp_name = F5
|
6 |
+
|
7 |
+
# the batch size
|
8 |
+
batch_size = 8
|
9 |
+
|
10 |
+
# the chunk size
|
11 |
+
max_frames = 3000
|
12 |
+
min_frames = 10
|
13 |
+
|
14 |
+
# number of CPU workers for the DataLoader
|
15 |
+
num_workers = 4
|
16 |
+
|
17 |
+
# the random seed
|
18 |
+
seed = 42
|
19 |
+
|
20 |
+
# Batches for gradient accumulation
|
21 |
+
accum_batches = 1
|
22 |
+
|
23 |
+
# Number of steps between checkpoints
|
24 |
+
checkpoint_every = 10000
|
25 |
+
|
26 |
+
# trainer checkpoint file to restart training from
|
27 |
+
ckpt_path = ''
|
28 |
+
|
29 |
+
# model checkpoint file to start a new training run from
|
30 |
+
pretrained_ckpt_path = ''
|
31 |
+
|
32 |
+
# Checkpoint path for the pretransform model if needed
|
33 |
+
pretransform_ckpt_path = ''
|
34 |
+
|
35 |
+
# configuration model specifying model hyperparameters
|
36 |
+
model_config = ''
|
37 |
+
|
38 |
+
# configuration for datasets
|
39 |
+
dataset_config = ''
|
40 |
+
|
41 |
+
# directory to save the checkpoints in
|
42 |
+
save_dir = ''
|
43 |
+
|
44 |
+
# grad norm
|
45 |
+
max_grad_norm = 1.0
|
46 |
+
|
47 |
+
# grad accu
|
48 |
+
grad_accumulation_steps = 1
|
49 |
+
|
50 |
+
# lr
|
51 |
+
learning_rate = 7.5e-5
|
52 |
+
|
53 |
+
# epoch
|
54 |
+
epochs = 110
|
55 |
+
|
56 |
+
# warmup steps
|
57 |
+
num_warmup_updates = 2000
|
58 |
+
|
59 |
+
# save checkpoint per steps
|
60 |
+
save_per_updates = 5000
|
61 |
+
|
62 |
+
# save last checkpoint per steps
|
63 |
+
last_per_steps = 5000
|
64 |
+
|
65 |
+
prompt_path = "/mnt/sfs/music/lance/style-lance-full|/mnt/sfs/music/lance/style-lance-cnen-music-second"
|
66 |
+
lrc_path = "/mnt/sfs/music/lance/lrc-lance-emb-full|/mnt/sfs/music/lance/lrc-lance-cnen-second"
|
67 |
+
latent_path = "/mnt/sfs/music/lance/latent-lance|/mnt/sfs/music/lance/latent-lance-cnen-music-second-1|/mnt/sfs/music/lance/latent-lance-cnen-music-second-2"
|
68 |
+
|
69 |
+
audio_drop_prob = 0.3
|
70 |
+
cond_drop_prob = 0.0
|
71 |
+
style_drop_prob = 0.1
|
72 |
+
lrc_drop_prob = 0.1
|
73 |
+
|
74 |
+
align_lyrics = 0
|
75 |
+
lyrics_slice = 0
|
76 |
+
parse_lyrics = 1
|
77 |
+
skip_empty_lyrics = 0
|
78 |
+
lyrics_shift = -1
|
79 |
+
|
80 |
+
use_style_prompt = 1
|
81 |
+
|
82 |
+
tokenizer_type = gpt2
|
83 |
+
|
84 |
+
reset_lr = 0
|
85 |
+
|
86 |
+
resumable_with_seed = 666
|
87 |
+
|
88 |
+
downsample_rate = 2048
|
89 |
+
|
90 |
+
grad_ckpt = 0
|
91 |
+
|
92 |
+
dataset_path = "/mnt/sfs/music/hkchen/workspace/F5-TTS-HW/filelists/music123latent_asred_bpmstyle_cnen_pure1"
|
93 |
+
|
94 |
+
pure_prob = 0.0
|
diffrhythm/config/diffrhythm-1b.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_type": "diffrhythm",
|
3 |
+
"model": {
|
4 |
+
"dim": 2048,
|
5 |
+
"depth": 16,
|
6 |
+
"heads": 32,
|
7 |
+
"ff_mult": 4,
|
8 |
+
"text_dim": 512,
|
9 |
+
"conv_layers": 4,
|
10 |
+
"mel_dim": 64,
|
11 |
+
"text_num_embeds": 363
|
12 |
+
}
|
13 |
+
}
|
diffrhythm/g2p/__pycache__/g2p_generation.cpython-310.pyc
ADDED
Binary file (2.61 kB). View file
|
|
diffrhythm/g2p/__pycache__/g2p_generation.cpython-311.pyc
ADDED
Binary file (4.85 kB). View file
|
|
diffrhythm/g2p/g2p/__init__.py
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2024 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
from diffrhythm.g2p.g2p import cleaners
|
7 |
+
from tokenizers import Tokenizer
|
8 |
+
from diffrhythm.g2p.g2p.text_tokenizers import TextTokenizer
|
9 |
+
import LangSegment
|
10 |
+
import json
|
11 |
+
import re
|
12 |
+
|
13 |
+
|
14 |
+
class PhonemeBpeTokenizer:
|
15 |
+
|
16 |
+
def __init__(self, vacab_path="./diffrhythm/g2p/g2p/vocab.json"):
|
17 |
+
self.lang2backend = {
|
18 |
+
"zh": "cmn",
|
19 |
+
"ja": "ja",
|
20 |
+
"en": "en-us",
|
21 |
+
"fr": "fr-fr",
|
22 |
+
"ko": "ko",
|
23 |
+
"de": "de",
|
24 |
+
}
|
25 |
+
self.text_tokenizers = {}
|
26 |
+
self.int_text_tokenizers()
|
27 |
+
|
28 |
+
with open(vacab_path, "r") as f:
|
29 |
+
json_data = f.read()
|
30 |
+
data = json.loads(json_data)
|
31 |
+
self.vocab = data["vocab"]
|
32 |
+
LangSegment.setfilters(["en", "zh", "ja", "ko", "fr", "de"])
|
33 |
+
|
34 |
+
def int_text_tokenizers(self):
|
35 |
+
for key, value in self.lang2backend.items():
|
36 |
+
self.text_tokenizers[key] = TextTokenizer(language=value)
|
37 |
+
|
38 |
+
def tokenize(self, text, sentence, language):
|
39 |
+
|
40 |
+
# 1. convert text to phoneme
|
41 |
+
phonemes = []
|
42 |
+
if language == "auto":
|
43 |
+
seglist = LangSegment.getTexts(text)
|
44 |
+
tmp_ph = []
|
45 |
+
for seg in seglist:
|
46 |
+
tmp_ph.append(
|
47 |
+
self._clean_text(
|
48 |
+
seg["text"], sentence, seg["lang"], ["cjekfd_cleaners"]
|
49 |
+
)
|
50 |
+
)
|
51 |
+
phonemes = "|_|".join(tmp_ph)
|
52 |
+
else:
|
53 |
+
phonemes = self._clean_text(text, sentence, language, ["cjekfd_cleaners"])
|
54 |
+
# print('clean text: ', phonemes)
|
55 |
+
|
56 |
+
# 2. tokenize phonemes
|
57 |
+
phoneme_tokens = self.phoneme2token(phonemes)
|
58 |
+
# print('encode: ', phoneme_tokens)
|
59 |
+
|
60 |
+
# # 3. decode tokens [optional]
|
61 |
+
# decoded_text = self.tokenizer.decode(phoneme_tokens)
|
62 |
+
# print('decoded: ', decoded_text)
|
63 |
+
|
64 |
+
return phonemes, phoneme_tokens
|
65 |
+
|
66 |
+
def _clean_text(self, text, sentence, language, cleaner_names):
|
67 |
+
for name in cleaner_names:
|
68 |
+
cleaner = getattr(cleaners, name)
|
69 |
+
if not cleaner:
|
70 |
+
raise Exception("Unknown cleaner: %s" % name)
|
71 |
+
text = cleaner(text, sentence, language, self.text_tokenizers)
|
72 |
+
return text
|
73 |
+
|
74 |
+
def phoneme2token(self, phonemes):
|
75 |
+
tokens = []
|
76 |
+
if isinstance(phonemes, list):
|
77 |
+
for phone in phonemes:
|
78 |
+
phone = phone.split("\t")[0]
|
79 |
+
phonemes_split = phone.split("|")
|
80 |
+
tokens.append(
|
81 |
+
[self.vocab[p] for p in phonemes_split if p in self.vocab]
|
82 |
+
)
|
83 |
+
else:
|
84 |
+
phonemes = phonemes.split("\t")[0]
|
85 |
+
phonemes_split = phonemes.split("|")
|
86 |
+
tokens = [self.vocab[p] for p in phonemes_split if p in self.vocab]
|
87 |
+
return tokens
|
diffrhythm/g2p/g2p/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (2.62 kB). View file
|
|
diffrhythm/g2p/g2p/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (4.6 kB). View file
|
|
diffrhythm/g2p/g2p/__pycache__/chinese_model_g2p.cpython-310.pyc
ADDED
Binary file (6.87 kB). View file
|
|
diffrhythm/g2p/g2p/__pycache__/chinese_model_g2p.cpython-311.pyc
ADDED
Binary file (13.1 kB). View file
|
|
diffrhythm/g2p/g2p/__pycache__/cleaners.cpython-310.pyc
ADDED
Binary file (950 Bytes). View file
|
|
diffrhythm/g2p/g2p/__pycache__/cleaners.cpython-311.pyc
ADDED
Binary file (1.5 kB). View file
|
|
diffrhythm/g2p/g2p/__pycache__/english.cpython-310.pyc
ADDED
Binary file (4.93 kB). View file
|
|
diffrhythm/g2p/g2p/__pycache__/english.cpython-311.pyc
ADDED
Binary file (9.27 kB). View file
|
|
diffrhythm/g2p/g2p/__pycache__/french.cpython-310.pyc
ADDED
Binary file (3.66 kB). View file
|
|
diffrhythm/g2p/g2p/__pycache__/french.cpython-311.pyc
ADDED
Binary file (5.74 kB). View file
|
|
diffrhythm/g2p/g2p/__pycache__/german.cpython-310.pyc
ADDED
Binary file (2.45 kB). View file
|
|
diffrhythm/g2p/g2p/__pycache__/german.cpython-311.pyc
ADDED
Binary file (4.03 kB). View file
|
|
diffrhythm/g2p/g2p/__pycache__/japanese.cpython-310.pyc
ADDED
Binary file (17.7 kB). View file
|
|
diffrhythm/g2p/g2p/__pycache__/japanese.cpython-311.pyc
ADDED
Binary file (28.1 kB). View file
|
|
diffrhythm/g2p/g2p/__pycache__/korean.cpython-310.pyc
ADDED
Binary file (1.94 kB). View file
|
|
diffrhythm/g2p/g2p/__pycache__/korean.cpython-311.pyc
ADDED
Binary file (2.91 kB). View file
|
|
diffrhythm/g2p/g2p/__pycache__/mandarin.cpython-310.pyc
ADDED
Binary file (12.6 kB). View file
|
|
diffrhythm/g2p/g2p/__pycache__/mandarin.cpython-311.pyc
ADDED
Binary file (25.2 kB). View file
|
|
diffrhythm/g2p/g2p/__pycache__/text_tokenizers.cpython-310.pyc
ADDED
Binary file (2.67 kB). View file
|
|
diffrhythm/g2p/g2p/__pycache__/text_tokenizers.cpython-311.pyc
ADDED
Binary file (4.78 kB). View file
|
|
diffrhythm/g2p/g2p/chinese_model_g2p.py
ADDED
@@ -0,0 +1,213 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2024 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
import os
|
7 |
+
import numpy as np
|
8 |
+
import torch
|
9 |
+
from torch.utils.data import DataLoader
|
10 |
+
import json
|
11 |
+
from transformers import BertTokenizer
|
12 |
+
from torch.utils.data import Dataset
|
13 |
+
from transformers.models.bert.modeling_bert import *
|
14 |
+
import torch
|
15 |
+
import torch.nn.functional as F
|
16 |
+
from onnxruntime import InferenceSession, GraphOptimizationLevel, SessionOptions
|
17 |
+
|
18 |
+
|
19 |
+
class PolyDataset(Dataset):
|
20 |
+
def __init__(self, words, labels, word_pad_idx=0, label_pad_idx=-1):
|
21 |
+
self.dataset = self.preprocess(words, labels)
|
22 |
+
self.word_pad_idx = word_pad_idx
|
23 |
+
self.label_pad_idx = label_pad_idx
|
24 |
+
|
25 |
+
def preprocess(self, origin_sentences, origin_labels):
|
26 |
+
"""
|
27 |
+
Maps tokens and tags to their indices and stores them in the dict data.
|
28 |
+
examples:
|
29 |
+
word:['[CLS]', '浙', '商', '银', '行', '企', '业', '信', '贷', '部']
|
30 |
+
sentence:([101, 3851, 1555, 7213, 6121, 821, 689, 928, 6587, 6956],
|
31 |
+
array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]))
|
32 |
+
label:[3, 13, 13, 13, 0, 0, 0, 0, 0]
|
33 |
+
"""
|
34 |
+
data = []
|
35 |
+
labels = []
|
36 |
+
sentences = []
|
37 |
+
# tokenize
|
38 |
+
for line in origin_sentences:
|
39 |
+
# replace each token by its index
|
40 |
+
# we can not use encode_plus because our sentences are aligned to labels in list type
|
41 |
+
words = []
|
42 |
+
word_lens = []
|
43 |
+
for token in line:
|
44 |
+
words.append(token)
|
45 |
+
word_lens.append(1)
|
46 |
+
token_start_idxs = 1 + np.cumsum([0] + word_lens[:-1])
|
47 |
+
sentences.append(((words, token_start_idxs), 0))
|
48 |
+
###
|
49 |
+
for tag in origin_labels:
|
50 |
+
labels.append(tag)
|
51 |
+
|
52 |
+
for sentence, label in zip(sentences, labels):
|
53 |
+
data.append((sentence, label))
|
54 |
+
return data
|
55 |
+
|
56 |
+
def __getitem__(self, idx):
|
57 |
+
"""sample data to get batch"""
|
58 |
+
word = self.dataset[idx][0]
|
59 |
+
label = self.dataset[idx][1]
|
60 |
+
return [word, label]
|
61 |
+
|
62 |
+
def __len__(self):
|
63 |
+
"""get dataset size"""
|
64 |
+
return len(self.dataset)
|
65 |
+
|
66 |
+
def collate_fn(self, batch):
|
67 |
+
|
68 |
+
sentences = [x[0][0] for x in batch]
|
69 |
+
ori_sents = [x[0][1] for x in batch]
|
70 |
+
labels = [x[1] for x in batch]
|
71 |
+
batch_len = len(sentences)
|
72 |
+
|
73 |
+
# compute length of longest sentence in batch
|
74 |
+
max_len = max([len(s[0]) for s in sentences])
|
75 |
+
max_label_len = 0
|
76 |
+
batch_data = np.ones((batch_len, max_len))
|
77 |
+
batch_label_starts = []
|
78 |
+
|
79 |
+
# padding and aligning
|
80 |
+
for j in range(batch_len):
|
81 |
+
cur_len = len(sentences[j][0])
|
82 |
+
batch_data[j][:cur_len] = sentences[j][0]
|
83 |
+
label_start_idx = sentences[j][-1]
|
84 |
+
label_starts = np.zeros(max_len)
|
85 |
+
label_starts[[idx for idx in label_start_idx if idx < max_len]] = 1
|
86 |
+
batch_label_starts.append(label_starts)
|
87 |
+
max_label_len = max(int(sum(label_starts)), max_label_len)
|
88 |
+
|
89 |
+
# padding label
|
90 |
+
batch_labels = self.label_pad_idx * np.ones((batch_len, max_label_len))
|
91 |
+
batch_pmasks = self.label_pad_idx * np.ones((batch_len, max_label_len))
|
92 |
+
for j in range(batch_len):
|
93 |
+
cur_tags_len = len(labels[j])
|
94 |
+
batch_labels[j][:cur_tags_len] = labels[j]
|
95 |
+
batch_pmasks[j][:cur_tags_len] = [
|
96 |
+
1 if item > 0 else 0 for item in labels[j]
|
97 |
+
]
|
98 |
+
|
99 |
+
# convert data to torch LongTensors
|
100 |
+
batch_data = torch.tensor(batch_data, dtype=torch.long)
|
101 |
+
batch_label_starts = torch.tensor(batch_label_starts, dtype=torch.long)
|
102 |
+
batch_labels = torch.tensor(batch_labels, dtype=torch.long)
|
103 |
+
batch_pmasks = torch.tensor(batch_pmasks, dtype=torch.long)
|
104 |
+
return [batch_data, batch_label_starts, batch_labels, batch_pmasks, ori_sents]
|
105 |
+
|
106 |
+
|
107 |
+
class BertPolyPredict:
|
108 |
+
def __init__(self, bert_model, jsonr_file, json_file):
|
109 |
+
self.tokenizer = BertTokenizer.from_pretrained(bert_model, do_lower_case=True)
|
110 |
+
with open(jsonr_file, "r", encoding="utf8") as fp:
|
111 |
+
self.pron_dict = json.load(fp)
|
112 |
+
with open(json_file, "r", encoding="utf8") as fp:
|
113 |
+
self.pron_dict_id_2_pinyin = json.load(fp)
|
114 |
+
self.num_polyphone = len(self.pron_dict)
|
115 |
+
self.device = "cpu"
|
116 |
+
self.polydataset = PolyDataset
|
117 |
+
options = SessionOptions() # initialize session options
|
118 |
+
options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
|
119 |
+
print(os.path.join(bert_model, "poly_bert_model.onnx"))
|
120 |
+
self.session = InferenceSession(
|
121 |
+
os.path.join(bert_model, "poly_bert_model.onnx"),
|
122 |
+
sess_options=options,
|
123 |
+
providers=[
|
124 |
+
"CoreMLExecutionProvider", # Replace CUDA with CoreML
|
125 |
+
"CPUExecutionProvider",
|
126 |
+
], # CPUExecutionProvider #CUDAExecutionProvider
|
127 |
+
)
|
128 |
+
# self.session.set_providers(['CUDAExecutionProvider', "CPUExecutionProvider"], [ {'device_id': 0}])
|
129 |
+
|
130 |
+
# disable session.run() fallback mechanism, it prevents for a reset of the execution provider
|
131 |
+
self.session.disable_fallback()
|
132 |
+
|
133 |
+
def predict_process(self, txt_list):
|
134 |
+
word_test, label_test, texts_test = self.get_examples_po(txt_list)
|
135 |
+
data = self.polydataset(word_test, label_test)
|
136 |
+
predict_loader = DataLoader(
|
137 |
+
data, batch_size=1, shuffle=False, collate_fn=data.collate_fn
|
138 |
+
)
|
139 |
+
pred_tags = self.predict_onnx(predict_loader)
|
140 |
+
return pred_tags
|
141 |
+
|
142 |
+
def predict_onnx(self, dev_loader):
|
143 |
+
pred_tags = []
|
144 |
+
with torch.no_grad():
|
145 |
+
for idx, batch_samples in enumerate(dev_loader):
|
146 |
+
# [batch_data, batch_label_starts, batch_labels, batch_pmasks, ori_sents]
|
147 |
+
batch_data, batch_label_starts, batch_labels, batch_pmasks, _ = (
|
148 |
+
batch_samples
|
149 |
+
)
|
150 |
+
# shift tensors to GPU if available
|
151 |
+
batch_data = batch_data.to(self.device)
|
152 |
+
batch_label_starts = batch_label_starts.to(self.device)
|
153 |
+
batch_labels = batch_labels.to(self.device)
|
154 |
+
batch_pmasks = batch_pmasks.to(self.device)
|
155 |
+
batch_data = np.asarray(batch_data, dtype=np.float32)
|
156 |
+
batch_pmasks = np.asarray(batch_pmasks, dtype=np.float32)
|
157 |
+
# batch_output = self.session.run(output_names=['outputs'], input_feed={"input_ids":batch_data, "input_pmasks": batch_pmasks})[0][0]
|
158 |
+
batch_output = self.session.run(
|
159 |
+
output_names=["outputs"], input_feed={"input_ids": batch_data}
|
160 |
+
)[0]
|
161 |
+
label_masks = batch_pmasks == 1
|
162 |
+
batch_labels = batch_labels.to("cpu").numpy()
|
163 |
+
for i, indices in enumerate(np.argmax(batch_output, axis=2)):
|
164 |
+
for j, idx in enumerate(indices):
|
165 |
+
if label_masks[i][j]:
|
166 |
+
# pred_tag.append(idx)
|
167 |
+
pred_tags.append(self.pron_dict_id_2_pinyin[str(idx + 1)])
|
168 |
+
return pred_tags
|
169 |
+
|
170 |
+
def get_examples_po(self, text_list):
|
171 |
+
|
172 |
+
word_list = []
|
173 |
+
label_list = []
|
174 |
+
sentence_list = []
|
175 |
+
id = 0
|
176 |
+
for line in [text_list]:
|
177 |
+
sentence = line[0]
|
178 |
+
words = []
|
179 |
+
tokens = line[0]
|
180 |
+
index = line[-1]
|
181 |
+
front = index
|
182 |
+
back = len(tokens) - index - 1
|
183 |
+
labels = [0] * front + [1] + [0] * back
|
184 |
+
words = ["[CLS]"] + [item for item in sentence]
|
185 |
+
words = self.tokenizer.convert_tokens_to_ids(words)
|
186 |
+
word_list.append(words)
|
187 |
+
label_list.append(labels)
|
188 |
+
sentence_list.append(sentence)
|
189 |
+
|
190 |
+
id += 1
|
191 |
+
# mask_list.append(masks)
|
192 |
+
assert len(labels) + 1 == len(words), print(
|
193 |
+
(
|
194 |
+
poly,
|
195 |
+
sentence,
|
196 |
+
words,
|
197 |
+
labels,
|
198 |
+
sentence,
|
199 |
+
len(sentence),
|
200 |
+
len(words),
|
201 |
+
len(labels),
|
202 |
+
)
|
203 |
+
)
|
204 |
+
assert len(labels) + 1 == len(
|
205 |
+
words
|
206 |
+
), "Number of labels does not match number of words"
|
207 |
+
assert len(labels) == len(
|
208 |
+
sentence
|
209 |
+
), "Number of labels does not match number of sentences"
|
210 |
+
assert len(word_list) == len(
|
211 |
+
label_list
|
212 |
+
), "Number of label sentences does not match number of word sentences"
|
213 |
+
return word_list, label_list, text_list
|
diffrhythm/g2p/g2p/cleaners.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2024 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
import re
|
7 |
+
from diffrhythm.g2p.g2p.japanese import japanese_to_ipa
|
8 |
+
from diffrhythm.g2p.g2p.mandarin import chinese_to_ipa
|
9 |
+
from diffrhythm.g2p.g2p.english import english_to_ipa
|
10 |
+
from diffrhythm.g2p.g2p.french import french_to_ipa
|
11 |
+
from diffrhythm.g2p.g2p.korean import korean_to_ipa
|
12 |
+
from diffrhythm.g2p.g2p.german import german_to_ipa
|
13 |
+
|
14 |
+
|
15 |
+
def cjekfd_cleaners(text, sentence, language, text_tokenizers):
|
16 |
+
|
17 |
+
if language == "zh":
|
18 |
+
return chinese_to_ipa(text, sentence, text_tokenizers["zh"])
|
19 |
+
elif language == "ja":
|
20 |
+
return japanese_to_ipa(text, text_tokenizers["ja"])
|
21 |
+
elif language == "en":
|
22 |
+
return english_to_ipa(text, text_tokenizers["en"])
|
23 |
+
elif language == "fr":
|
24 |
+
return french_to_ipa(text, text_tokenizers["fr"])
|
25 |
+
elif language == "ko":
|
26 |
+
return korean_to_ipa(text, text_tokenizers["ko"])
|
27 |
+
elif language == "de":
|
28 |
+
return german_to_ipa(text, text_tokenizers["de"])
|
29 |
+
else:
|
30 |
+
raise Exception("Unknown language: %s" % language)
|
31 |
+
return None
|
diffrhythm/g2p/g2p/english.py
ADDED
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2024 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
import re
|
7 |
+
from unidecode import unidecode
|
8 |
+
import inflect
|
9 |
+
|
10 |
+
"""
|
11 |
+
Text clean time
|
12 |
+
"""
|
13 |
+
_inflect = inflect.engine()
|
14 |
+
_comma_number_re = re.compile(r"([0-9][0-9\,]+[0-9])")
|
15 |
+
_decimal_number_re = re.compile(r"([0-9]+\.[0-9]+)")
|
16 |
+
_percent_number_re = re.compile(r"([0-9\.\,]*[0-9]+%)")
|
17 |
+
_pounds_re = re.compile(r"£([0-9\,]*[0-9]+)")
|
18 |
+
_dollars_re = re.compile(r"\$([0-9\.\,]*[0-9]+)")
|
19 |
+
_fraction_re = re.compile(r"([0-9]+)/([0-9]+)")
|
20 |
+
_ordinal_re = re.compile(r"[0-9]+(st|nd|rd|th)")
|
21 |
+
_number_re = re.compile(r"[0-9]+")
|
22 |
+
|
23 |
+
# List of (regular expression, replacement) pairs for abbreviations:
|
24 |
+
_abbreviations = [
|
25 |
+
(re.compile("\\b%s\\b" % x[0], re.IGNORECASE), x[1])
|
26 |
+
for x in [
|
27 |
+
("mrs", "misess"),
|
28 |
+
("mr", "mister"),
|
29 |
+
("dr", "doctor"),
|
30 |
+
("st", "saint"),
|
31 |
+
("co", "company"),
|
32 |
+
("jr", "junior"),
|
33 |
+
("maj", "major"),
|
34 |
+
("gen", "general"),
|
35 |
+
("drs", "doctors"),
|
36 |
+
("rev", "reverend"),
|
37 |
+
("lt", "lieutenant"),
|
38 |
+
("hon", "honorable"),
|
39 |
+
("sgt", "sergeant"),
|
40 |
+
("capt", "captain"),
|
41 |
+
("esq", "esquire"),
|
42 |
+
("ltd", "limited"),
|
43 |
+
("col", "colonel"),
|
44 |
+
("ft", "fort"),
|
45 |
+
("etc", "et cetera"),
|
46 |
+
("btw", "by the way"),
|
47 |
+
]
|
48 |
+
]
|
49 |
+
|
50 |
+
_special_map = [
|
51 |
+
("t|ɹ", "tɹ"),
|
52 |
+
("d|ɹ", "dɹ"),
|
53 |
+
("t|s", "ts"),
|
54 |
+
("d|z", "dz"),
|
55 |
+
("ɪ|ɹ", "ɪɹ"),
|
56 |
+
("ɐ", "ɚ"),
|
57 |
+
("ᵻ", "ɪ"),
|
58 |
+
("əl", "l"),
|
59 |
+
("x", "k"),
|
60 |
+
("ɬ", "l"),
|
61 |
+
("ʔ", "t"),
|
62 |
+
("n̩", "n"),
|
63 |
+
("oː|ɹ", "oːɹ"),
|
64 |
+
]
|
65 |
+
|
66 |
+
|
67 |
+
def expand_abbreviations(text):
|
68 |
+
for regex, replacement in _abbreviations:
|
69 |
+
text = re.sub(regex, replacement, text)
|
70 |
+
return text
|
71 |
+
|
72 |
+
|
73 |
+
def _remove_commas(m):
|
74 |
+
return m.group(1).replace(",", "")
|
75 |
+
|
76 |
+
|
77 |
+
def _expand_decimal_point(m):
|
78 |
+
return m.group(1).replace(".", " point ")
|
79 |
+
|
80 |
+
|
81 |
+
def _expand_percent(m):
|
82 |
+
return m.group(1).replace("%", " percent ")
|
83 |
+
|
84 |
+
|
85 |
+
def _expand_dollars(m):
|
86 |
+
match = m.group(1)
|
87 |
+
parts = match.split(".")
|
88 |
+
if len(parts) > 2:
|
89 |
+
return " " + match + " dollars " # Unexpected format
|
90 |
+
dollars = int(parts[0]) if parts[0] else 0
|
91 |
+
cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
|
92 |
+
if dollars and cents:
|
93 |
+
dollar_unit = "dollar" if dollars == 1 else "dollars"
|
94 |
+
cent_unit = "cent" if cents == 1 else "cents"
|
95 |
+
return " %s %s, %s %s " % (dollars, dollar_unit, cents, cent_unit)
|
96 |
+
elif dollars:
|
97 |
+
dollar_unit = "dollar" if dollars == 1 else "dollars"
|
98 |
+
return " %s %s " % (dollars, dollar_unit)
|
99 |
+
elif cents:
|
100 |
+
cent_unit = "cent" if cents == 1 else "cents"
|
101 |
+
return " %s %s " % (cents, cent_unit)
|
102 |
+
else:
|
103 |
+
return " zero dollars "
|
104 |
+
|
105 |
+
|
106 |
+
def fraction_to_words(numerator, denominator):
|
107 |
+
if numerator == 1 and denominator == 2:
|
108 |
+
return " one half "
|
109 |
+
if numerator == 1 and denominator == 4:
|
110 |
+
return " one quarter "
|
111 |
+
if denominator == 2:
|
112 |
+
return " " + _inflect.number_to_words(numerator) + " halves "
|
113 |
+
if denominator == 4:
|
114 |
+
return " " + _inflect.number_to_words(numerator) + " quarters "
|
115 |
+
return (
|
116 |
+
" "
|
117 |
+
+ _inflect.number_to_words(numerator)
|
118 |
+
+ " "
|
119 |
+
+ _inflect.ordinal(_inflect.number_to_words(denominator))
|
120 |
+
+ " "
|
121 |
+
)
|
122 |
+
|
123 |
+
|
124 |
+
def _expand_fraction(m):
|
125 |
+
numerator = int(m.group(1))
|
126 |
+
denominator = int(m.group(2))
|
127 |
+
return fraction_to_words(numerator, denominator)
|
128 |
+
|
129 |
+
|
130 |
+
def _expand_ordinal(m):
|
131 |
+
return " " + _inflect.number_to_words(m.group(0)) + " "
|
132 |
+
|
133 |
+
|
134 |
+
def _expand_number(m):
|
135 |
+
num = int(m.group(0))
|
136 |
+
if num > 1000 and num < 3000:
|
137 |
+
if num == 2000:
|
138 |
+
return " two thousand "
|
139 |
+
elif num > 2000 and num < 2010:
|
140 |
+
return " two thousand " + _inflect.number_to_words(num % 100) + " "
|
141 |
+
elif num % 100 == 0:
|
142 |
+
return " " + _inflect.number_to_words(num // 100) + " hundred "
|
143 |
+
else:
|
144 |
+
return (
|
145 |
+
" "
|
146 |
+
+ _inflect.number_to_words(num, andword="", zero="oh", group=2).replace(
|
147 |
+
", ", " "
|
148 |
+
)
|
149 |
+
+ " "
|
150 |
+
)
|
151 |
+
else:
|
152 |
+
return " " + _inflect.number_to_words(num, andword="") + " "
|
153 |
+
|
154 |
+
|
155 |
+
# Normalize numbers pronunciation
|
156 |
+
def normalize_numbers(text):
|
157 |
+
text = re.sub(_comma_number_re, _remove_commas, text)
|
158 |
+
text = re.sub(_pounds_re, r"\1 pounds", text)
|
159 |
+
text = re.sub(_dollars_re, _expand_dollars, text)
|
160 |
+
text = re.sub(_fraction_re, _expand_fraction, text)
|
161 |
+
text = re.sub(_decimal_number_re, _expand_decimal_point, text)
|
162 |
+
text = re.sub(_percent_number_re, _expand_percent, text)
|
163 |
+
text = re.sub(_ordinal_re, _expand_ordinal, text)
|
164 |
+
text = re.sub(_number_re, _expand_number, text)
|
165 |
+
return text
|
166 |
+
|
167 |
+
|
168 |
+
def _english_to_ipa(text):
|
169 |
+
# text = unidecode(text).lower()
|
170 |
+
text = expand_abbreviations(text)
|
171 |
+
text = normalize_numbers(text)
|
172 |
+
return text
|
173 |
+
|
174 |
+
|
175 |
+
# special map
|
176 |
+
def special_map(text):
|
177 |
+
for regex, replacement in _special_map:
|
178 |
+
regex = regex.replace("|", "\|")
|
179 |
+
while re.search(r"(^|[_|]){}([_|]|$)".format(regex), text):
|
180 |
+
text = re.sub(
|
181 |
+
r"(^|[_|]){}([_|]|$)".format(regex), r"\1{}\2".format(replacement), text
|
182 |
+
)
|
183 |
+
# text = re.sub(r'([,.!?])', r'|\1', text)
|
184 |
+
return text
|
185 |
+
|
186 |
+
|
187 |
+
# Add some special operation
|
188 |
+
def english_to_ipa(text, text_tokenizer):
|
189 |
+
if type(text) == str:
|
190 |
+
text = _english_to_ipa(text)
|
191 |
+
else:
|
192 |
+
text = [_english_to_ipa(t) for t in text]
|
193 |
+
phonemes = text_tokenizer(text)
|
194 |
+
if phonemes[-1] in "p⁼ʰmftnlkxʃs`ɹaoəɛɪeɑʊŋiuɥwæjː":
|
195 |
+
phonemes += "|_"
|
196 |
+
if type(text) == str:
|
197 |
+
return special_map(phonemes)
|
198 |
+
else:
|
199 |
+
result_ph = []
|
200 |
+
for phone in phonemes:
|
201 |
+
result_ph.append(special_map(phone))
|
202 |
+
return result_ph
|
diffrhythm/g2p/g2p/french.py
ADDED
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2024 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
import re
|
7 |
+
|
8 |
+
"""
|
9 |
+
Text clean time
|
10 |
+
"""
|
11 |
+
# List of (regular expression, replacement) pairs for abbreviations in french:
|
12 |
+
_abbreviations = [
|
13 |
+
(re.compile("\\b%s\\." % x[0], re.IGNORECASE), x[1])
|
14 |
+
for x in [
|
15 |
+
("M", "monsieur"),
|
16 |
+
("Mlle", "mademoiselle"),
|
17 |
+
("Mlles", "mesdemoiselles"),
|
18 |
+
("Mme", "Madame"),
|
19 |
+
("Mmes", "Mesdames"),
|
20 |
+
("N.B", "nota bene"),
|
21 |
+
("M", "monsieur"),
|
22 |
+
("p.c.q", "parce que"),
|
23 |
+
("Pr", "professeur"),
|
24 |
+
("qqch", "quelque chose"),
|
25 |
+
("rdv", "rendez-vous"),
|
26 |
+
("max", "maximum"),
|
27 |
+
("min", "minimum"),
|
28 |
+
("no", "numéro"),
|
29 |
+
("adr", "adresse"),
|
30 |
+
("dr", "docteur"),
|
31 |
+
("st", "saint"),
|
32 |
+
("co", "companie"),
|
33 |
+
("jr", "junior"),
|
34 |
+
("sgt", "sergent"),
|
35 |
+
("capt", "capitain"),
|
36 |
+
("col", "colonel"),
|
37 |
+
("av", "avenue"),
|
38 |
+
("av. J.-C", "avant Jésus-Christ"),
|
39 |
+
("apr. J.-C", "après Jésus-Christ"),
|
40 |
+
("art", "article"),
|
41 |
+
("boul", "boulevard"),
|
42 |
+
("c.-à-d", "c’est-à-dire"),
|
43 |
+
("etc", "et cetera"),
|
44 |
+
("ex", "exemple"),
|
45 |
+
("excl", "exclusivement"),
|
46 |
+
("boul", "boulevard"),
|
47 |
+
]
|
48 |
+
] + [
|
49 |
+
(re.compile("\\b%s" % x[0]), x[1])
|
50 |
+
for x in [
|
51 |
+
("Mlle", "mademoiselle"),
|
52 |
+
("Mlles", "mesdemoiselles"),
|
53 |
+
("Mme", "Madame"),
|
54 |
+
("Mmes", "Mesdames"),
|
55 |
+
]
|
56 |
+
]
|
57 |
+
|
58 |
+
rep_map = {
|
59 |
+
":": ",",
|
60 |
+
";": ",",
|
61 |
+
",": ",",
|
62 |
+
"。": ".",
|
63 |
+
"!": "!",
|
64 |
+
"?": "?",
|
65 |
+
"\n": ".",
|
66 |
+
"·": ",",
|
67 |
+
"、": ",",
|
68 |
+
"...": ".",
|
69 |
+
"…": ".",
|
70 |
+
"$": ".",
|
71 |
+
"“": "",
|
72 |
+
"”": "",
|
73 |
+
"‘": "",
|
74 |
+
"’": "",
|
75 |
+
"(": "",
|
76 |
+
")": "",
|
77 |
+
"(": "",
|
78 |
+
")": "",
|
79 |
+
"《": "",
|
80 |
+
"》": "",
|
81 |
+
"【": "",
|
82 |
+
"】": "",
|
83 |
+
"[": "",
|
84 |
+
"]": "",
|
85 |
+
"—": "",
|
86 |
+
"~": "-",
|
87 |
+
"~": "-",
|
88 |
+
"「": "",
|
89 |
+
"」": "",
|
90 |
+
"¿": "",
|
91 |
+
"¡": "",
|
92 |
+
}
|
93 |
+
|
94 |
+
|
95 |
+
def collapse_whitespace(text):
|
96 |
+
# Regular expression matching whitespace:
|
97 |
+
_whitespace_re = re.compile(r"\s+")
|
98 |
+
return re.sub(_whitespace_re, " ", text).strip()
|
99 |
+
|
100 |
+
|
101 |
+
def remove_punctuation_at_begin(text):
|
102 |
+
return re.sub(r"^[,.!?]+", "", text)
|
103 |
+
|
104 |
+
|
105 |
+
def remove_aux_symbols(text):
|
106 |
+
text = re.sub(r"[\<\>\(\)\[\]\"\«\»]+", "", text)
|
107 |
+
return text
|
108 |
+
|
109 |
+
|
110 |
+
def replace_symbols(text):
|
111 |
+
text = text.replace(";", ",")
|
112 |
+
text = text.replace("-", " ")
|
113 |
+
text = text.replace(":", ",")
|
114 |
+
text = text.replace("&", " et ")
|
115 |
+
return text
|
116 |
+
|
117 |
+
|
118 |
+
def expand_abbreviations(text):
|
119 |
+
for regex, replacement in _abbreviations:
|
120 |
+
text = re.sub(regex, replacement, text)
|
121 |
+
return text
|
122 |
+
|
123 |
+
|
124 |
+
def replace_punctuation(text):
|
125 |
+
pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
|
126 |
+
replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
|
127 |
+
return replaced_text
|
128 |
+
|
129 |
+
|
130 |
+
def text_normalize(text):
|
131 |
+
text = expand_abbreviations(text)
|
132 |
+
text = replace_punctuation(text)
|
133 |
+
text = replace_symbols(text)
|
134 |
+
text = remove_aux_symbols(text)
|
135 |
+
text = remove_punctuation_at_begin(text)
|
136 |
+
text = collapse_whitespace(text)
|
137 |
+
text = re.sub(r"([^\.,!\?\-…])$", r"\1", text)
|
138 |
+
return text
|
139 |
+
|
140 |
+
|
141 |
+
def french_to_ipa(text, text_tokenizer):
|
142 |
+
if type(text) == str:
|
143 |
+
text = text_normalize(text)
|
144 |
+
phonemes = text_tokenizer(text)
|
145 |
+
return phonemes
|
146 |
+
else:
|
147 |
+
for i, t in enumerate(text):
|
148 |
+
text[i] = text_normalize(t)
|
149 |
+
return text_tokenizer(text)
|
diffrhythm/g2p/g2p/german.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2024 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
import re
|
7 |
+
|
8 |
+
"""
|
9 |
+
Text clean time
|
10 |
+
"""
|
11 |
+
rep_map = {
|
12 |
+
":": ",",
|
13 |
+
";": ",",
|
14 |
+
",": ",",
|
15 |
+
"。": ".",
|
16 |
+
"!": "!",
|
17 |
+
"?": "?",
|
18 |
+
"\n": ".",
|
19 |
+
"·": ",",
|
20 |
+
"、": ",",
|
21 |
+
"...": ".",
|
22 |
+
"…": ".",
|
23 |
+
"$": ".",
|
24 |
+
"“": "",
|
25 |
+
"”": "",
|
26 |
+
"‘": "",
|
27 |
+
"’": "",
|
28 |
+
"(": "",
|
29 |
+
")": "",
|
30 |
+
"(": "",
|
31 |
+
")": "",
|
32 |
+
"《": "",
|
33 |
+
"》": "",
|
34 |
+
"【": "",
|
35 |
+
"】": "",
|
36 |
+
"[": "",
|
37 |
+
"]": "",
|
38 |
+
"—": "",
|
39 |
+
"~": "-",
|
40 |
+
"~": "-",
|
41 |
+
"「": "",
|
42 |
+
"」": "",
|
43 |
+
"¿": "",
|
44 |
+
"¡": "",
|
45 |
+
}
|
46 |
+
|
47 |
+
|
48 |
+
def collapse_whitespace(text):
|
49 |
+
# Regular expression matching whitespace:
|
50 |
+
_whitespace_re = re.compile(r"\s+")
|
51 |
+
return re.sub(_whitespace_re, " ", text).strip()
|
52 |
+
|
53 |
+
|
54 |
+
def remove_punctuation_at_begin(text):
|
55 |
+
return re.sub(r"^[,.!?]+", "", text)
|
56 |
+
|
57 |
+
|
58 |
+
def remove_aux_symbols(text):
|
59 |
+
text = re.sub(r"[\<\>\(\)\[\]\"\«\»]+", "", text)
|
60 |
+
return text
|
61 |
+
|
62 |
+
|
63 |
+
def replace_symbols(text):
|
64 |
+
text = text.replace(";", ",")
|
65 |
+
text = text.replace("-", " ")
|
66 |
+
text = text.replace(":", ",")
|
67 |
+
return text
|
68 |
+
|
69 |
+
|
70 |
+
def replace_punctuation(text):
|
71 |
+
pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
|
72 |
+
replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
|
73 |
+
return replaced_text
|
74 |
+
|
75 |
+
|
76 |
+
def text_normalize(text):
|
77 |
+
text = replace_punctuation(text)
|
78 |
+
text = replace_symbols(text)
|
79 |
+
text = remove_aux_symbols(text)
|
80 |
+
text = remove_punctuation_at_begin(text)
|
81 |
+
text = collapse_whitespace(text)
|
82 |
+
text = re.sub(r"([^\.,!\?\-…])$", r"\1", text)
|
83 |
+
return text
|
84 |
+
|
85 |
+
|
86 |
+
def german_to_ipa(text, text_tokenizer):
|
87 |
+
if type(text) == str:
|
88 |
+
text = text_normalize(text)
|
89 |
+
phonemes = text_tokenizer(text)
|
90 |
+
return phonemes
|
91 |
+
else:
|
92 |
+
for i, t in enumerate(text):
|
93 |
+
text[i] = text_normalize(t)
|
94 |
+
return text_tokenizer(text)
|
diffrhythm/g2p/g2p/japanese.py
ADDED
@@ -0,0 +1,816 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2024 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
import io, re, os, sys, time, argparse, pdb, json
|
7 |
+
from io import StringIO
|
8 |
+
from typing import Optional
|
9 |
+
import numpy as np
|
10 |
+
import traceback
|
11 |
+
import pyopenjtalk
|
12 |
+
from pykakasi import kakasi
|
13 |
+
|
14 |
+
punctuation = [",", ".", "!", "?", ":", ";", "'", "…"]
|
15 |
+
|
16 |
+
jp_xphone2ipa = [
|
17 |
+
" a a",
|
18 |
+
" i i",
|
19 |
+
" u ɯ",
|
20 |
+
" e e",
|
21 |
+
" o o",
|
22 |
+
" a: aː",
|
23 |
+
" i: iː",
|
24 |
+
" u: ɯː",
|
25 |
+
" e: eː",
|
26 |
+
" o: oː",
|
27 |
+
" k k",
|
28 |
+
" s s",
|
29 |
+
" t t",
|
30 |
+
" n n",
|
31 |
+
" h ç",
|
32 |
+
" f ɸ",
|
33 |
+
" m m",
|
34 |
+
" y j",
|
35 |
+
" r ɾ",
|
36 |
+
" w ɰᵝ",
|
37 |
+
" N ɴ",
|
38 |
+
" g g",
|
39 |
+
" j d ʑ",
|
40 |
+
" z z",
|
41 |
+
" d d",
|
42 |
+
" b b",
|
43 |
+
" p p",
|
44 |
+
" q q",
|
45 |
+
" v v",
|
46 |
+
" : :",
|
47 |
+
" by b j",
|
48 |
+
" ch t ɕ",
|
49 |
+
" dy d e j",
|
50 |
+
" ty t e j",
|
51 |
+
" gy g j",
|
52 |
+
" gw g ɯ",
|
53 |
+
" hy ç j",
|
54 |
+
" ky k j",
|
55 |
+
" kw k ɯ",
|
56 |
+
" my m j",
|
57 |
+
" ny n j",
|
58 |
+
" py p j",
|
59 |
+
" ry ɾ j",
|
60 |
+
" sh ɕ",
|
61 |
+
" ts t s ɯ",
|
62 |
+
]
|
63 |
+
|
64 |
+
_mora_list_minimum: list[tuple[str, Optional[str], str]] = [
|
65 |
+
("ヴォ", "v", "o"),
|
66 |
+
("ヴェ", "v", "e"),
|
67 |
+
("ヴィ", "v", "i"),
|
68 |
+
("ヴァ", "v", "a"),
|
69 |
+
("ヴ", "v", "u"),
|
70 |
+
("ン", None, "N"),
|
71 |
+
("ワ", "w", "a"),
|
72 |
+
("ロ", "r", "o"),
|
73 |
+
("レ", "r", "e"),
|
74 |
+
("ル", "r", "u"),
|
75 |
+
("リョ", "ry", "o"),
|
76 |
+
("リュ", "ry", "u"),
|
77 |
+
("リャ", "ry", "a"),
|
78 |
+
("リェ", "ry", "e"),
|
79 |
+
("リ", "r", "i"),
|
80 |
+
("ラ", "r", "a"),
|
81 |
+
("ヨ", "y", "o"),
|
82 |
+
("ユ", "y", "u"),
|
83 |
+
("ヤ", "y", "a"),
|
84 |
+
("モ", "m", "o"),
|
85 |
+
("メ", "m", "e"),
|
86 |
+
("ム", "m", "u"),
|
87 |
+
("ミョ", "my", "o"),
|
88 |
+
("ミュ", "my", "u"),
|
89 |
+
("ミャ", "my", "a"),
|
90 |
+
("ミェ", "my", "e"),
|
91 |
+
("ミ", "m", "i"),
|
92 |
+
("マ", "m", "a"),
|
93 |
+
("ポ", "p", "o"),
|
94 |
+
("ボ", "b", "o"),
|
95 |
+
("ホ", "h", "o"),
|
96 |
+
("ペ", "p", "e"),
|
97 |
+
("ベ", "b", "e"),
|
98 |
+
("ヘ", "h", "e"),
|
99 |
+
("プ", "p", "u"),
|
100 |
+
("ブ", "b", "u"),
|
101 |
+
("フォ", "f", "o"),
|
102 |
+
("フェ", "f", "e"),
|
103 |
+
("フィ", "f", "i"),
|
104 |
+
("ファ", "f", "a"),
|
105 |
+
("フ", "f", "u"),
|
106 |
+
("ピョ", "py", "o"),
|
107 |
+
("ピュ", "py", "u"),
|
108 |
+
("ピャ", "py", "a"),
|
109 |
+
("ピェ", "py", "e"),
|
110 |
+
("ピ", "p", "i"),
|
111 |
+
("ビョ", "by", "o"),
|
112 |
+
("ビュ", "by", "u"),
|
113 |
+
("ビャ", "by", "a"),
|
114 |
+
("ビェ", "by", "e"),
|
115 |
+
("ビ", "b", "i"),
|
116 |
+
("ヒョ", "hy", "o"),
|
117 |
+
("ヒュ", "hy", "u"),
|
118 |
+
("ヒャ", "hy", "a"),
|
119 |
+
("ヒェ", "hy", "e"),
|
120 |
+
("ヒ", "h", "i"),
|
121 |
+
("パ", "p", "a"),
|
122 |
+
("バ", "b", "a"),
|
123 |
+
("ハ", "h", "a"),
|
124 |
+
("ノ", "n", "o"),
|
125 |
+
("ネ", "n", "e"),
|
126 |
+
("ヌ", "n", "u"),
|
127 |
+
("ニョ", "ny", "o"),
|
128 |
+
("ニュ", "ny", "u"),
|
129 |
+
("ニャ", "ny", "a"),
|
130 |
+
("ニェ", "ny", "e"),
|
131 |
+
("ニ", "n", "i"),
|
132 |
+
("ナ", "n", "a"),
|
133 |
+
("ドゥ", "d", "u"),
|
134 |
+
("ド", "d", "o"),
|
135 |
+
("トゥ", "t", "u"),
|
136 |
+
("ト", "t", "o"),
|
137 |
+
("デョ", "dy", "o"),
|
138 |
+
("デュ", "dy", "u"),
|
139 |
+
("デャ", "dy", "a"),
|
140 |
+
# ("デェ", "dy", "e"),
|
141 |
+
("ディ", "d", "i"),
|
142 |
+
("デ", "d", "e"),
|
143 |
+
("テョ", "ty", "o"),
|
144 |
+
("テュ", "ty", "u"),
|
145 |
+
("テャ", "ty", "a"),
|
146 |
+
("ティ", "t", "i"),
|
147 |
+
("テ", "t", "e"),
|
148 |
+
("ツォ", "ts", "o"),
|
149 |
+
("ツェ", "ts", "e"),
|
150 |
+
("ツィ", "ts", "i"),
|
151 |
+
("ツァ", "ts", "a"),
|
152 |
+
("ツ", "ts", "u"),
|
153 |
+
("ッ", None, "q"), # 「cl」から「q」に変更
|
154 |
+
("チョ", "ch", "o"),
|
155 |
+
("チュ", "ch", "u"),
|
156 |
+
("チャ", "ch", "a"),
|
157 |
+
("チェ", "ch", "e"),
|
158 |
+
("チ", "ch", "i"),
|
159 |
+
("ダ", "d", "a"),
|
160 |
+
("タ", "t", "a"),
|
161 |
+
("ゾ", "z", "o"),
|
162 |
+
("ソ", "s", "o"),
|
163 |
+
("ゼ", "z", "e"),
|
164 |
+
("セ", "s", "e"),
|
165 |
+
("ズィ", "z", "i"),
|
166 |
+
("ズ", "z", "u"),
|
167 |
+
("スィ", "s", "i"),
|
168 |
+
("ス", "s", "u"),
|
169 |
+
("ジョ", "j", "o"),
|
170 |
+
("ジュ", "j", "u"),
|
171 |
+
("ジャ", "j", "a"),
|
172 |
+
("ジェ", "j", "e"),
|
173 |
+
("ジ", "j", "i"),
|
174 |
+
("ショ", "sh", "o"),
|
175 |
+
("シュ", "sh", "u"),
|
176 |
+
("シャ", "sh", "a"),
|
177 |
+
("シェ", "sh", "e"),
|
178 |
+
("シ", "sh", "i"),
|
179 |
+
("ザ", "z", "a"),
|
180 |
+
("サ", "s", "a"),
|
181 |
+
("ゴ", "g", "o"),
|
182 |
+
("コ", "k", "o"),
|
183 |
+
("ゲ", "g", "e"),
|
184 |
+
("ケ", "k", "e"),
|
185 |
+
("グヮ", "gw", "a"),
|
186 |
+
("グ", "g", "u"),
|
187 |
+
("クヮ", "kw", "a"),
|
188 |
+
("ク", "k", "u"),
|
189 |
+
("ギョ", "gy", "o"),
|
190 |
+
("ギュ", "gy", "u"),
|
191 |
+
("ギャ", "gy", "a"),
|
192 |
+
("ギェ", "gy", "e"),
|
193 |
+
("ギ", "g", "i"),
|
194 |
+
("キョ", "ky", "o"),
|
195 |
+
("キュ", "ky", "u"),
|
196 |
+
("キャ", "ky", "a"),
|
197 |
+
("キェ", "ky", "e"),
|
198 |
+
("キ", "k", "i"),
|
199 |
+
("ガ", "g", "a"),
|
200 |
+
("カ", "k", "a"),
|
201 |
+
("オ", None, "o"),
|
202 |
+
("エ", None, "e"),
|
203 |
+
("ウォ", "w", "o"),
|
204 |
+
("ウェ", "w", "e"),
|
205 |
+
("ウィ", "w", "i"),
|
206 |
+
("ウ", None, "u"),
|
207 |
+
("イェ", "y", "e"),
|
208 |
+
("イ", None, "i"),
|
209 |
+
("ア", None, "a"),
|
210 |
+
]
|
211 |
+
|
212 |
+
_mora_list_additional: list[tuple[str, Optional[str], str]] = [
|
213 |
+
("ヴョ", "by", "o"),
|
214 |
+
("ヴュ", "by", "u"),
|
215 |
+
("ヴャ", "by", "a"),
|
216 |
+
("ヲ", None, "o"),
|
217 |
+
("ヱ", None, "e"),
|
218 |
+
("ヰ", None, "i"),
|
219 |
+
("ヮ", "w", "a"),
|
220 |
+
("ョ", "y", "o"),
|
221 |
+
("ュ", "y", "u"),
|
222 |
+
("ヅ", "z", "u"),
|
223 |
+
("ヂ", "j", "i"),
|
224 |
+
("ヶ", "k", "e"),
|
225 |
+
("ャ", "y", "a"),
|
226 |
+
("ォ", None, "o"),
|
227 |
+
("ェ", None, "e"),
|
228 |
+
("ゥ", None, "u"),
|
229 |
+
("ィ", None, "i"),
|
230 |
+
("ァ", None, "a"),
|
231 |
+
]
|
232 |
+
|
233 |
+
# 例: "vo" -> "ヴォ", "a" -> "ア"
|
234 |
+
mora_phonemes_to_mora_kata: dict[str, str] = {
|
235 |
+
(consonant or "") + vowel: kana for [kana, consonant, vowel] in _mora_list_minimum
|
236 |
+
}
|
237 |
+
|
238 |
+
# 例: "ヴォ" -> ("v", "o"), "ア" -> (None, "a")
|
239 |
+
mora_kata_to_mora_phonemes: dict[str, tuple[Optional[str], str]] = {
|
240 |
+
kana: (consonant, vowel)
|
241 |
+
for [kana, consonant, vowel] in _mora_list_minimum + _mora_list_additional
|
242 |
+
}
|
243 |
+
|
244 |
+
|
245 |
+
# 正規化で記号を変換するための辞書
|
246 |
+
rep_map = {
|
247 |
+
":": ":",
|
248 |
+
";": ";",
|
249 |
+
",": ",",
|
250 |
+
"。": ".",
|
251 |
+
"!": "!",
|
252 |
+
"?": "?",
|
253 |
+
"\n": ".",
|
254 |
+
".": ".",
|
255 |
+
"⋯": "…",
|
256 |
+
"···": "…",
|
257 |
+
"・・・": "…",
|
258 |
+
"·": ",",
|
259 |
+
"・": ",",
|
260 |
+
"•": ",",
|
261 |
+
"、": ",",
|
262 |
+
"$": ".",
|
263 |
+
# "“": "'",
|
264 |
+
# "”": "'",
|
265 |
+
# '"': "'",
|
266 |
+
"‘": "'",
|
267 |
+
"’": "'",
|
268 |
+
# "(": "'",
|
269 |
+
# ")": "'",
|
270 |
+
# "(": "'",
|
271 |
+
# ")": "'",
|
272 |
+
# "《": "'",
|
273 |
+
# "》": "'",
|
274 |
+
# "【": "'",
|
275 |
+
# "】": "'",
|
276 |
+
# "[": "'",
|
277 |
+
# "]": "'",
|
278 |
+
# "——": "-",
|
279 |
+
# "−": "-",
|
280 |
+
# "-": "-",
|
281 |
+
# "『": "'",
|
282 |
+
# "』": "'",
|
283 |
+
# "〈": "'",
|
284 |
+
# "〉": "'",
|
285 |
+
# "«": "'",
|
286 |
+
# "»": "'",
|
287 |
+
# # "~": "-", # これは長音記号「ー」として扱うよう変更
|
288 |
+
# # "~": "-", # これは長音記号「ー」として扱うよう変更
|
289 |
+
# "「": "'",
|
290 |
+
# "」": "'",
|
291 |
+
}
|
292 |
+
|
293 |
+
|
294 |
+
def _numeric_feature_by_regex(regex, s):
|
295 |
+
match = re.search(regex, s)
|
296 |
+
if match is None:
|
297 |
+
return -50
|
298 |
+
return int(match.group(1))
|
299 |
+
|
300 |
+
|
301 |
+
def replace_punctuation(text: str) -> str:
|
302 |
+
"""句読点等を「.」「,」「!」「?」「'」「-」に正規化し、OpenJTalkで読みが取得できるもののみ残す:
|
303 |
+
漢字・平仮名・カタカナ、アルファベット、ギリシャ文字
|
304 |
+
"""
|
305 |
+
pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
|
306 |
+
# print("before: ", text)
|
307 |
+
# 句読点を辞書で置換
|
308 |
+
replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
|
309 |
+
|
310 |
+
replaced_text = re.sub(
|
311 |
+
# ↓ ひらがな、カタカナ、漢字
|
312 |
+
r"[^\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF\u3400-\u4DBF\u3005"
|
313 |
+
# ↓ 半角アルファベット(大文字と小文字)
|
314 |
+
+ r"\u0041-\u005A\u0061-\u007A"
|
315 |
+
# ↓ 全角アルファベット(大文字と小文字)
|
316 |
+
+ r"\uFF21-\uFF3A\uFF41-\uFF5A"
|
317 |
+
# ↓ ギリシャ文字
|
318 |
+
+ r"\u0370-\u03FF\u1F00-\u1FFF"
|
319 |
+
# ↓ "!", "?", "…", ",", ".", "'", "-", 但し`…`はすでに`...`に変換されている
|
320 |
+
+ "".join(punctuation) + r"]+",
|
321 |
+
# 上述以外の文字を削除
|
322 |
+
"",
|
323 |
+
replaced_text,
|
324 |
+
)
|
325 |
+
# print("after: ", replaced_text)
|
326 |
+
return replaced_text
|
327 |
+
|
328 |
+
|
329 |
+
def fix_phone_tone(phone_tone_list: list[tuple[str, int]]) -> list[tuple[str, int]]:
|
330 |
+
"""
|
331 |
+
`phone_tone_list`のtone(アクセントの値)を0か1の範囲に修正する。
|
332 |
+
例: [(a, 0), (i, -1), (u, -1)] → [(a, 1), (i, 0), (u, 0)]
|
333 |
+
"""
|
334 |
+
tone_values = set(tone for _, tone in phone_tone_list)
|
335 |
+
if len(tone_values) == 1:
|
336 |
+
assert tone_values == {0}, tone_values
|
337 |
+
return phone_tone_list
|
338 |
+
elif len(tone_values) == 2:
|
339 |
+
if tone_values == {0, 1}:
|
340 |
+
return phone_tone_list
|
341 |
+
elif tone_values == {-1, 0}:
|
342 |
+
return [
|
343 |
+
(letter, 0 if tone == -1 else 1) for letter, tone in phone_tone_list
|
344 |
+
]
|
345 |
+
else:
|
346 |
+
raise ValueError(f"Unexpected tone values: {tone_values}")
|
347 |
+
else:
|
348 |
+
raise ValueError(f"Unexpected tone values: {tone_values}")
|
349 |
+
|
350 |
+
|
351 |
+
def fix_phone_tone_wplen(phone_tone_list, word_phone_length_list):
|
352 |
+
phones = []
|
353 |
+
tones = []
|
354 |
+
w_p_len = []
|
355 |
+
p_len = len(phone_tone_list)
|
356 |
+
idx = 0
|
357 |
+
w_idx = 0
|
358 |
+
while idx < p_len:
|
359 |
+
offset = 0
|
360 |
+
if phone_tone_list[idx] == "▁":
|
361 |
+
w_p_len.append(w_idx + 1)
|
362 |
+
|
363 |
+
curr_w_p_len = word_phone_length_list[w_idx]
|
364 |
+
for i in range(curr_w_p_len):
|
365 |
+
p, t = phone_tone_list[idx]
|
366 |
+
if p == ":" and len(phones) > 0:
|
367 |
+
if phones[-1][-1] != ":":
|
368 |
+
phones[-1] += ":"
|
369 |
+
offset -= 1
|
370 |
+
else:
|
371 |
+
phones.append(p)
|
372 |
+
tones.append(str(t))
|
373 |
+
idx += 1
|
374 |
+
if idx >= p_len:
|
375 |
+
break
|
376 |
+
w_p_len.append(curr_w_p_len + offset)
|
377 |
+
w_idx += 1
|
378 |
+
# print(w_p_len)
|
379 |
+
return phones, tones, w_p_len
|
380 |
+
|
381 |
+
|
382 |
+
def g2phone_tone_wo_punct(prosodies) -> list[tuple[str, int]]:
|
383 |
+
"""
|
384 |
+
テキストに対して、音素とアクセント(0か1)のペアのリストを返す。
|
385 |
+
ただし「!」「.」「?」等の非音素記号(punctuation)は全て消える(ポーズ記号も残さない)。
|
386 |
+
非音素記号を含める処理は`align_tones()`で行われる。
|
387 |
+
また「っ」は「cl」でなく「q」に変換される(「ん」は「N」のまま)。
|
388 |
+
例: "こんにちは、世界ー。。元気?!" →
|
389 |
+
[('k', 0), ('o', 0), ('N', 1), ('n', 1), ('i', 1), ('ch', 1), ('i', 1), ('w', 1), ('a', 1), ('s', 1), ('e', 1), ('k', 0), ('a', 0), ('i', 0), ('i', 0), ('g', 1), ('e', 1), ('N', 0), ('k', 0), ('i', 0)]
|
390 |
+
"""
|
391 |
+
result: list[tuple[str, int]] = []
|
392 |
+
current_phrase: list[tuple[str, int]] = []
|
393 |
+
current_tone = 0
|
394 |
+
last_accent = ""
|
395 |
+
for i, letter in enumerate(prosodies):
|
396 |
+
# 特殊記号の処理
|
397 |
+
|
398 |
+
# 文頭記号、無視する
|
399 |
+
if letter == "^":
|
400 |
+
assert i == 0, "Unexpected ^"
|
401 |
+
# アクセント句の終わりに来る記号
|
402 |
+
elif letter in ("$", "?", "_", "#"):
|
403 |
+
# 保持しているフレーズを、アクセント数値を0-1に修正し結果に追加
|
404 |
+
result.extend(fix_phone_tone(current_phrase))
|
405 |
+
# 末尾に来る終了記号、無視(文中の疑問文は`_`になる)
|
406 |
+
if letter in ("$", "?"):
|
407 |
+
assert i == len(prosodies) - 1, f"Unexpected {letter}"
|
408 |
+
# あとは"_"(ポーズ)と"#"(アクセント句の境界)のみ
|
409 |
+
# これらは残さず、次のアクセント句に備える。
|
410 |
+
|
411 |
+
current_phrase = []
|
412 |
+
# 0を基準点にしてそこから上昇・下降する(負の場合は上の`fix_phone_tone`で直る)
|
413 |
+
current_tone = 0
|
414 |
+
last_accent = ""
|
415 |
+
# アクセント上昇記号
|
416 |
+
elif letter == "[":
|
417 |
+
if last_accent != letter:
|
418 |
+
current_tone = current_tone + 1
|
419 |
+
last_accent = letter
|
420 |
+
# アクセント下降記号
|
421 |
+
elif letter == "]":
|
422 |
+
if last_accent != letter:
|
423 |
+
current_tone = current_tone - 1
|
424 |
+
last_accent = letter
|
425 |
+
# それ以外は通常の音素
|
426 |
+
else:
|
427 |
+
if letter == "cl": # 「っ」の処理
|
428 |
+
letter = "q"
|
429 |
+
current_phrase.append((letter, current_tone))
|
430 |
+
return result
|
431 |
+
|
432 |
+
|
433 |
+
def handle_long(sep_phonemes: list[list[str]]) -> list[list[str]]:
|
434 |
+
for i in range(len(sep_phonemes)):
|
435 |
+
if sep_phonemes[i][0] == "ー":
|
436 |
+
# sep_phonemes[i][0] = sep_phonemes[i - 1][-1]
|
437 |
+
sep_phonemes[i][0] = ":"
|
438 |
+
if "ー" in sep_phonemes[i]:
|
439 |
+
for j in range(len(sep_phonemes[i])):
|
440 |
+
if sep_phonemes[i][j] == "ー":
|
441 |
+
# sep_phonemes[i][j] = sep_phonemes[i][j - 1][-1]
|
442 |
+
sep_phonemes[i][j] = ":"
|
443 |
+
return sep_phonemes
|
444 |
+
|
445 |
+
|
446 |
+
def handle_long_word(sep_phonemes: list[list[str]]) -> list[list[str]]:
|
447 |
+
res = []
|
448 |
+
for i in range(len(sep_phonemes)):
|
449 |
+
if sep_phonemes[i][0] == "ー":
|
450 |
+
sep_phonemes[i][0] = sep_phonemes[i - 1][-1]
|
451 |
+
# sep_phonemes[i][0] = ':'
|
452 |
+
if "ー" in sep_phonemes[i]:
|
453 |
+
for j in range(len(sep_phonemes[i])):
|
454 |
+
if sep_phonemes[i][j] == "ー":
|
455 |
+
sep_phonemes[i][j] = sep_phonemes[i][j - 1][-1]
|
456 |
+
# sep_phonemes[i][j] = ':'
|
457 |
+
res.append(sep_phonemes[i])
|
458 |
+
res.append("▁")
|
459 |
+
return res
|
460 |
+
|
461 |
+
|
462 |
+
def align_tones(
|
463 |
+
phones_with_punct: list[str], phone_tone_list: list[tuple[str, int]]
|
464 |
+
) -> list[tuple[str, int]]:
|
465 |
+
"""
|
466 |
+
例:
|
467 |
+
…私は、、そう思う。
|
468 |
+
phones_with_punct:
|
469 |
+
[".", ".", ".", "w", "a", "t", "a", "sh", "i", "w", "a", ",", ",", "s", "o", "o", "o", "m", "o", "u", "."]
|
470 |
+
phone_tone_list:
|
471 |
+
[("w", 0), ("a", 0), ("t", 1), ("a", 1), ("sh", 1), ("i", 1), ("w", 1), ("a", 1), ("s", 0), ("o", 0), ("o", 1), ("o", 1), ("m", 1), ("o", 1), ("u", 0))]
|
472 |
+
Return:
|
473 |
+
[(".", 0), (".", 0), (".", 0), ("w", 0), ("a", 0), ("t", 1), ("a", 1), ("sh", 1), ("i", 1), ("w", 1), ("a", 1), (",", 0), (",", 0), ("s", 0), ("o", 0), ("o", 1), ("o", 1), ("m", 1), ("o", 1), ("u", 0), (".", 0)]
|
474 |
+
"""
|
475 |
+
result: list[tuple[str, int]] = []
|
476 |
+
tone_index = 0
|
477 |
+
for phone in phones_with_punct:
|
478 |
+
if tone_index >= len(phone_tone_list):
|
479 |
+
# 余ったpunctuationがある場合 → (punctuation, 0)を追加
|
480 |
+
result.append((phone, 0))
|
481 |
+
elif phone == phone_tone_list[tone_index][0]:
|
482 |
+
# phone_tone_listの現在の音素と一致する場合 → toneをそこから取得、(phone, tone)を追加
|
483 |
+
result.append((phone, phone_tone_list[tone_index][1]))
|
484 |
+
# 探すindexを1つ進める
|
485 |
+
tone_index += 1
|
486 |
+
elif phone in punctuation or phone == "▁":
|
487 |
+
# phoneがpunctuationの場合 → (phone, 0)を追加
|
488 |
+
result.append((phone, 0))
|
489 |
+
else:
|
490 |
+
print(f"phones: {phones_with_punct}")
|
491 |
+
print(f"phone_tone_list: {phone_tone_list}")
|
492 |
+
print(f"result: {result}")
|
493 |
+
print(f"tone_index: {tone_index}")
|
494 |
+
print(f"phone: {phone}")
|
495 |
+
raise ValueError(f"Unexpected phone: {phone}")
|
496 |
+
return result
|
497 |
+
|
498 |
+
|
499 |
+
def kata2phoneme_list(text: str) -> list[str]:
|
500 |
+
"""
|
501 |
+
原則カタカナの`text`を受け取り、それをそのままいじらずに音素記号のリストに変換。
|
502 |
+
注意点:
|
503 |
+
- punctuationが来た場合(punctuationが1文字の場合がありうる)、処理せず1文字のリストを返す
|
504 |
+
- 冒頭に続く「ー」はそのまま「ー」のままにする(`handle_long()`で処理される)
|
505 |
+
- 文中の「ー」は前の音素記号の最後の音素記号に変換される。
|
506 |
+
例:
|
507 |
+
`ーーソーナノカーー` → ["ー", "ー", "s", "o", "o", "n", "a", "n", "o", "k", "a", "a", "a"]
|
508 |
+
`?` → ["?"]
|
509 |
+
"""
|
510 |
+
if text in punctuation:
|
511 |
+
return [text]
|
512 |
+
# `text`がカタカナ(`ー`含む)のみからなるかどうかをチェック
|
513 |
+
if re.fullmatch(r"[\u30A0-\u30FF]+", text) is None:
|
514 |
+
raise ValueError(f"Input must be katakana only: {text}")
|
515 |
+
sorted_keys = sorted(mora_kata_to_mora_phonemes.keys(), key=len, reverse=True)
|
516 |
+
pattern = "|".join(map(re.escape, sorted_keys))
|
517 |
+
|
518 |
+
def mora2phonemes(mora: str) -> str:
|
519 |
+
cosonant, vowel = mora_kata_to_mora_phonemes[mora]
|
520 |
+
if cosonant is None:
|
521 |
+
return f" {vowel}"
|
522 |
+
return f" {cosonant} {vowel}"
|
523 |
+
|
524 |
+
spaced_phonemes = re.sub(pattern, lambda m: mora2phonemes(m.group()), text)
|
525 |
+
|
526 |
+
# 長音記号「ー」の処理
|
527 |
+
long_pattern = r"(\w)(ー*)"
|
528 |
+
long_replacement = lambda m: m.group(1) + (" " + m.group(1)) * len(m.group(2))
|
529 |
+
spaced_phonemes = re.sub(long_pattern, long_replacement, spaced_phonemes)
|
530 |
+
# spaced_phonemes += ' ▁'
|
531 |
+
return spaced_phonemes.strip().split(" ")
|
532 |
+
|
533 |
+
|
534 |
+
def frontend2phoneme(labels, drop_unvoiced_vowels=False):
|
535 |
+
N = len(labels)
|
536 |
+
|
537 |
+
phones = []
|
538 |
+
for n in range(N):
|
539 |
+
lab_curr = labels[n]
|
540 |
+
# print(lab_curr)
|
541 |
+
# current phoneme
|
542 |
+
p3 = re.search(r"\-(.*?)\+", lab_curr).group(1)
|
543 |
+
|
544 |
+
# deal unvoiced vowels as normal vowels
|
545 |
+
if drop_unvoiced_vowels and p3 in "AEIOU":
|
546 |
+
p3 = p3.lower()
|
547 |
+
|
548 |
+
# deal with sil at the beginning and the end of text
|
549 |
+
if p3 == "sil":
|
550 |
+
# assert n == 0 or n == N - 1
|
551 |
+
# if n == 0:
|
552 |
+
# phones.append("^")
|
553 |
+
# elif n == N - 1:
|
554 |
+
# # check question form or not
|
555 |
+
# e3 = _numeric_feature_by_regex(r"!(\d+)_", lab_curr)
|
556 |
+
# if e3 == 0:
|
557 |
+
# phones.append("$")
|
558 |
+
# elif e3 == 1:
|
559 |
+
# phones.append("?")
|
560 |
+
continue
|
561 |
+
elif p3 == "pau":
|
562 |
+
phones.append("_")
|
563 |
+
continue
|
564 |
+
else:
|
565 |
+
phones.append(p3)
|
566 |
+
|
567 |
+
# accent type and position info (forward or backward)
|
568 |
+
a1 = _numeric_feature_by_regex(r"/A:([0-9\-]+)\+", lab_curr)
|
569 |
+
a2 = _numeric_feature_by_regex(r"\+(\d+)\+", lab_curr)
|
570 |
+
a3 = _numeric_feature_by_regex(r"\+(\d+)/", lab_curr)
|
571 |
+
|
572 |
+
# number of mora in accent phrase
|
573 |
+
f1 = _numeric_feature_by_regex(r"/F:(\d+)_", lab_curr)
|
574 |
+
|
575 |
+
a2_next = _numeric_feature_by_regex(r"\+(\d+)\+", labels[n + 1])
|
576 |
+
# accent phrase border
|
577 |
+
# print(p3, a1, a2, a3, f1, a2_next, lab_curr)
|
578 |
+
if a3 == 1 and a2_next == 1 and p3 in "aeiouAEIOUNcl":
|
579 |
+
phones.append("#")
|
580 |
+
# pitch falling
|
581 |
+
elif a1 == 0 and a2_next == a2 + 1 and a2 != f1:
|
582 |
+
phones.append("]")
|
583 |
+
# pitch rising
|
584 |
+
elif a2 == 1 and a2_next == 2:
|
585 |
+
phones.append("[")
|
586 |
+
|
587 |
+
# phones = ' '.join(phones)
|
588 |
+
return phones
|
589 |
+
|
590 |
+
|
591 |
+
class JapanesePhoneConverter(object):
|
592 |
+
def __init__(self, lexicon_path=None, ipa_dict_path=None):
|
593 |
+
# lexicon_lines = open(lexicon_path, 'r', encoding='utf-8').readlines()
|
594 |
+
# self.lexicon = {}
|
595 |
+
# self.single_dict = {}
|
596 |
+
# self.double_dict = {}
|
597 |
+
# for curr_line in lexicon_lines:
|
598 |
+
# k,v = curr_line.strip().split('+',1)
|
599 |
+
# self.lexicon[k] = v
|
600 |
+
# if len(k) == 2:
|
601 |
+
# self.double_dict[k] = v
|
602 |
+
# elif len(k) == 1:
|
603 |
+
# self.single_dict[k] = v
|
604 |
+
self.ipa_dict = {}
|
605 |
+
for curr_line in jp_xphone2ipa:
|
606 |
+
k, v = curr_line.strip().split(" ", 1)
|
607 |
+
self.ipa_dict[k] = re.sub("\s", "", v)
|
608 |
+
# kakasi1 = kakasi()
|
609 |
+
# kakasi1.setMode("H","K")
|
610 |
+
# kakasi1.setMode("J","K")
|
611 |
+
# kakasi1.setMode("r","Hepburn")
|
612 |
+
self.japan_JH2K = kakasi()
|
613 |
+
self.table = {ord(f): ord(t) for f, t in zip("67", "_¯")}
|
614 |
+
|
615 |
+
def text2sep_kata(self, parsed) -> tuple[list[str], list[str]]:
|
616 |
+
"""
|
617 |
+
`text_normalize`で正規化済みの`norm_text`を受け取り、それを単語分割し、
|
618 |
+
分割された単語リストとその読み(カタカナor記号1文字)のリス���のタプルを返す。
|
619 |
+
単語分割結果は、`g2p()`の`word2ph`で1文字あたりに割り振る音素記号の数を決めるために使う。
|
620 |
+
例:
|
621 |
+
`私はそう思う!って感じ?` →
|
622 |
+
["私", "は", "そう", "思う", "!", "って", "感じ", "?"], ["ワタシ", "ワ", "ソー", "オモウ", "!", "ッテ", "カンジ", "?"]
|
623 |
+
"""
|
624 |
+
# parsed: OpenJTalkの解析結果
|
625 |
+
sep_text: list[str] = []
|
626 |
+
sep_kata: list[str] = []
|
627 |
+
fix_parsed = []
|
628 |
+
i = 0
|
629 |
+
while i <= len(parsed) - 1:
|
630 |
+
# word: 実際の単語の文字列
|
631 |
+
# yomi: その読み、但し無声化サインの`’`は除去
|
632 |
+
# print(parsed)
|
633 |
+
yomi = parsed[i]["pron"]
|
634 |
+
tmp_parsed = parsed[i]
|
635 |
+
if i != len(parsed) - 1 and parsed[i + 1]["string"] in [
|
636 |
+
"々",
|
637 |
+
"ゝ",
|
638 |
+
"ヽ",
|
639 |
+
"ゞ",
|
640 |
+
"ヾ",
|
641 |
+
"゛",
|
642 |
+
]:
|
643 |
+
word = parsed[i]["string"] + parsed[i + 1]["string"]
|
644 |
+
i += 1
|
645 |
+
else:
|
646 |
+
word = parsed[i]["string"]
|
647 |
+
word, yomi = replace_punctuation(word), yomi.replace("’", "")
|
648 |
+
"""
|
649 |
+
ここで`yomi`の取りうる値は以下の通りのはず。
|
650 |
+
- `word`が通常単語 → 通常の読み(カタカナ)
|
651 |
+
(カタカナからなり、長音記号も含みうる、`アー` 等)
|
652 |
+
- `word`が`ー` から始まる → `ーラー` や `ーーー` など
|
653 |
+
- `word`が句読点や空白等 → `、`
|
654 |
+
- `word`が`?` → `?`(全角になる)
|
655 |
+
他にも`word`が読めないキリル文字アラビア文字等が来ると`、`になるが、正規化でこの場合は起きないはず。
|
656 |
+
また元のコードでは`yomi`が空白の場合の処理があったが、これは起きないはず。
|
657 |
+
処理すべきは`yomi`が`、`の場合のみのはず。
|
658 |
+
"""
|
659 |
+
assert yomi != "", f"Empty yomi: {word}"
|
660 |
+
if yomi == "、":
|
661 |
+
# wordは正規化されているので、`.`, `,`, `!`, `'`, `-`のいずれか
|
662 |
+
if word not in (
|
663 |
+
".",
|
664 |
+
",",
|
665 |
+
"!",
|
666 |
+
"'",
|
667 |
+
"-",
|
668 |
+
"?",
|
669 |
+
":",
|
670 |
+
";",
|
671 |
+
"…",
|
672 |
+
"",
|
673 |
+
):
|
674 |
+
# ここはpyopenjtalkが読めない文字等のときに起こる
|
675 |
+
#print(
|
676 |
+
# "{}Cannot read:{}, yomi:{}, new_word:{};".format(
|
677 |
+
# parsed, word, yomi, self.japan_JH2K.convert(word)[0]["kana"]
|
678 |
+
# )
|
679 |
+
#)
|
680 |
+
# raise ValueError(word)
|
681 |
+
word = self.japan_JH2K.convert(word)[0]["kana"]
|
682 |
+
# print(word, self.japan_JH2K.convert(word)[0]['kana'], kata2phoneme_list(self.japan_JH2K.convert(word)[0]['kana']))
|
683 |
+
tmp_parsed["pron"] = word
|
684 |
+
# yomi = "-"
|
685 |
+
# word = ','
|
686 |
+
# yomiは元の記号のままに変更
|
687 |
+
# else:
|
688 |
+
# parsed[i]['pron'] = parsed[i]["string"]
|
689 |
+
yomi = word
|
690 |
+
elif yomi == "?":
|
691 |
+
assert word == "?", f"yomi `?` comes from: {word}"
|
692 |
+
yomi = "?"
|
693 |
+
if word == "":
|
694 |
+
i += 1
|
695 |
+
continue
|
696 |
+
sep_text.append(word)
|
697 |
+
sep_kata.append(yomi)
|
698 |
+
# print(word, yomi, parts)
|
699 |
+
fix_parsed.append(tmp_parsed)
|
700 |
+
i += 1
|
701 |
+
# print(sep_text, sep_kata)
|
702 |
+
return sep_text, sep_kata, fix_parsed
|
703 |
+
|
704 |
+
def getSentencePhone(self, sentence, blank_mode=True, phoneme_mode=False):
|
705 |
+
# print("origin:", sentence)
|
706 |
+
words = []
|
707 |
+
words_phone_len = []
|
708 |
+
short_char_flag = False
|
709 |
+
output_duration_flag = []
|
710 |
+
output_before_sil_flag = []
|
711 |
+
normed_text = []
|
712 |
+
sentence = sentence.strip().strip("'")
|
713 |
+
sentence = re.sub(r"\s+", "", sentence)
|
714 |
+
output_res = []
|
715 |
+
failed_words = []
|
716 |
+
last_long_pause = 4
|
717 |
+
last_word = None
|
718 |
+
frontend_text = pyopenjtalk.run_frontend(sentence)
|
719 |
+
# print("frontend_text: ", frontend_text)
|
720 |
+
try:
|
721 |
+
frontend_text = pyopenjtalk.estimate_accent(frontend_text)
|
722 |
+
except:
|
723 |
+
pass
|
724 |
+
# print("estimate_accent: ", frontend_text)
|
725 |
+
# sep_text: 単語単位の単語のリスト
|
726 |
+
# sep_kata: 単語単位の単語のカタカナ読みのリスト
|
727 |
+
sep_text, sep_kata, frontend_text = self.text2sep_kata(frontend_text)
|
728 |
+
# print("sep_text: ", sep_text)
|
729 |
+
# print("sep_kata: ", sep_kata)
|
730 |
+
# print("frontend_text: ", frontend_text)
|
731 |
+
# sep_phonemes: 各単語ご���の音素のリストのリスト
|
732 |
+
sep_phonemes = handle_long_word([kata2phoneme_list(i) for i in sep_kata])
|
733 |
+
# print("sep_phonemes: ", sep_phonemes)
|
734 |
+
|
735 |
+
pron_text = [x["pron"].strip().replace("’", "") for x in frontend_text]
|
736 |
+
# pdb.set_trace()
|
737 |
+
prosodys = pyopenjtalk.make_label(frontend_text)
|
738 |
+
prosodys = frontend2phoneme(prosodys, drop_unvoiced_vowels=True)
|
739 |
+
# print("prosodys: ", ' '.join(prosodys))
|
740 |
+
# print("pron_text: ", pron_text)
|
741 |
+
normed_text = [x["string"].strip() for x in frontend_text]
|
742 |
+
# punctuationがすべて消えた、音素とアクセントのタプルのリスト
|
743 |
+
phone_tone_list_wo_punct = g2phone_tone_wo_punct(prosodys)
|
744 |
+
# print("phone_tone_list_wo_punct: ", phone_tone_list_wo_punct)
|
745 |
+
|
746 |
+
# phone_w_punct: sep_phonemesを結合した、punctuationを元のまま保持した音素列
|
747 |
+
phone_w_punct: list[str] = []
|
748 |
+
w_p_len = []
|
749 |
+
for i in sep_phonemes:
|
750 |
+
phone_w_punct += i
|
751 |
+
w_p_len.append(len(i))
|
752 |
+
phone_w_punct = phone_w_punct[:-1]
|
753 |
+
# punctuation無しのアクセント情報を使って、punctuationを含めたアクセント情報を作る
|
754 |
+
# print("phone_w_punct: ", phone_w_punct)
|
755 |
+
# print("phone_tone_list_wo_punct: ", phone_tone_list_wo_punct)
|
756 |
+
phone_tone_list = align_tones(phone_w_punct, phone_tone_list_wo_punct)
|
757 |
+
|
758 |
+
jp_item = {}
|
759 |
+
jp_p = ""
|
760 |
+
jp_t = ""
|
761 |
+
# mye rye pye bye nye
|
762 |
+
# je she
|
763 |
+
# print(phone_tone_list)
|
764 |
+
for p, t in phone_tone_list:
|
765 |
+
if p in self.ipa_dict:
|
766 |
+
curr_p = self.ipa_dict[p]
|
767 |
+
jp_p += curr_p
|
768 |
+
jp_t += str(t + 6) * len(curr_p)
|
769 |
+
elif p in punctuation:
|
770 |
+
jp_p += p
|
771 |
+
jp_t += "0"
|
772 |
+
elif p == "▁":
|
773 |
+
jp_p += p
|
774 |
+
jp_t += " "
|
775 |
+
else:
|
776 |
+
print(p, t)
|
777 |
+
jp_p += "|"
|
778 |
+
jp_t += "0"
|
779 |
+
# return phones, tones, w_p_len
|
780 |
+
jp_p = jp_p.replace("▁", " ")
|
781 |
+
jp_t = jp_t.translate(self.table)
|
782 |
+
jp_l = ""
|
783 |
+
for t in jp_t:
|
784 |
+
if t == " ":
|
785 |
+
jp_l += " "
|
786 |
+
else:
|
787 |
+
jp_l += "2"
|
788 |
+
# print(jp_p)
|
789 |
+
# print(jp_t)
|
790 |
+
# print(jp_l)
|
791 |
+
# print(len(jp_p_len), sum(w_p_len), len(jp_p), sum(jp_p_len))
|
792 |
+
assert len(jp_p) == len(jp_t) and len(jp_p) == len(jp_l)
|
793 |
+
|
794 |
+
jp_item["jp_p"] = jp_p.replace("| |", "|").rstrip("|")
|
795 |
+
jp_item["jp_t"] = jp_t
|
796 |
+
jp_item["jp_l"] = jp_l
|
797 |
+
jp_item["jp_normed_text"] = " ".join(normed_text)
|
798 |
+
jp_item["jp_pron_text"] = " ".join(pron_text)
|
799 |
+
# jp_item['jp_ruoma'] = sep_phonemes
|
800 |
+
# print(len(normed_text), len(sep_phonemes))
|
801 |
+
# print(normed_text)
|
802 |
+
return jp_item
|
803 |
+
|
804 |
+
|
805 |
+
jpc = JapanesePhoneConverter()
|
806 |
+
|
807 |
+
|
808 |
+
def japanese_to_ipa(text, text_tokenizer):
|
809 |
+
# phonemes = text_tokenizer(text)
|
810 |
+
if type(text) == str:
|
811 |
+
return jpc.getSentencePhone(text)["jp_p"]
|
812 |
+
else:
|
813 |
+
result_ph = []
|
814 |
+
for t in text:
|
815 |
+
result_ph.append(jpc.getSentencePhone(t)["jp_p"])
|
816 |
+
return result_ph
|
diffrhythm/g2p/g2p/korean.py
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2024 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
import re
|
7 |
+
|
8 |
+
"""
|
9 |
+
Text clean time
|
10 |
+
"""
|
11 |
+
english_dictionary = {
|
12 |
+
"KOREA": "코리아",
|
13 |
+
"IDOL": "아이돌",
|
14 |
+
"IT": "아이티",
|
15 |
+
"IQ": "아이큐",
|
16 |
+
"UP": "업",
|
17 |
+
"DOWN": "다운",
|
18 |
+
"PC": "피씨",
|
19 |
+
"CCTV": "씨씨티비",
|
20 |
+
"SNS": "에스엔에스",
|
21 |
+
"AI": "에이아이",
|
22 |
+
"CEO": "씨이오",
|
23 |
+
"A": "에이",
|
24 |
+
"B": "비",
|
25 |
+
"C": "씨",
|
26 |
+
"D": "디",
|
27 |
+
"E": "이",
|
28 |
+
"F": "에프",
|
29 |
+
"G": "지",
|
30 |
+
"H": "에이치",
|
31 |
+
"I": "아이",
|
32 |
+
"J": "제이",
|
33 |
+
"K": "케이",
|
34 |
+
"L": "엘",
|
35 |
+
"M": "엠",
|
36 |
+
"N": "엔",
|
37 |
+
"O": "오",
|
38 |
+
"P": "피",
|
39 |
+
"Q": "큐",
|
40 |
+
"R": "알",
|
41 |
+
"S": "에스",
|
42 |
+
"T": "티",
|
43 |
+
"U": "유",
|
44 |
+
"V": "브이",
|
45 |
+
"W": "더블유",
|
46 |
+
"X": "엑스",
|
47 |
+
"Y": "와이",
|
48 |
+
"Z": "제트",
|
49 |
+
}
|
50 |
+
|
51 |
+
|
52 |
+
def normalize(text):
|
53 |
+
text = text.strip()
|
54 |
+
text = re.sub(
|
55 |
+
"[⺀-⺙⺛-⻳⼀-⿕々〇〡-〩〸-〺〻㐀-䶵一-鿃豈-鶴侮-頻並-龎]", "", text
|
56 |
+
)
|
57 |
+
text = normalize_english(text)
|
58 |
+
text = text.lower()
|
59 |
+
return text
|
60 |
+
|
61 |
+
|
62 |
+
def normalize_english(text):
|
63 |
+
def fn(m):
|
64 |
+
word = m.group()
|
65 |
+
if word in english_dictionary:
|
66 |
+
return english_dictionary.get(word)
|
67 |
+
return word
|
68 |
+
|
69 |
+
text = re.sub("([A-Za-z]+)", fn, text)
|
70 |
+
return text
|
71 |
+
|
72 |
+
|
73 |
+
def korean_to_ipa(text, text_tokenizer):
|
74 |
+
if type(text) == str:
|
75 |
+
text = normalize(text)
|
76 |
+
phonemes = text_tokenizer(text)
|
77 |
+
return phonemes
|
78 |
+
else:
|
79 |
+
for i, t in enumerate(text):
|
80 |
+
text[i] = normalize(t)
|
81 |
+
return text_tokenizer(text)
|
diffrhythm/g2p/g2p/mandarin.py
ADDED
@@ -0,0 +1,600 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2024 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
import re
|
7 |
+
import jieba
|
8 |
+
import cn2an
|
9 |
+
from pypinyin import lazy_pinyin, BOPOMOFO
|
10 |
+
from typing import List
|
11 |
+
from diffrhythm.g2p.g2p.chinese_model_g2p import BertPolyPredict
|
12 |
+
from diffrhythm.g2p.utils.front_utils import *
|
13 |
+
import os
|
14 |
+
from huggingface_hub import hf_hub_download
|
15 |
+
|
16 |
+
# from g2pw import G2PWConverter
|
17 |
+
|
18 |
+
|
19 |
+
# set blank level, {0:"none",1:"char", 2:"word"}
|
20 |
+
BLANK_LEVEL = 0
|
21 |
+
|
22 |
+
# conv = G2PWConverter(style='pinyin', enable_non_tradional_chinese=True)
|
23 |
+
resource_path = r"./diffrhythm/g2p"
|
24 |
+
poly_all_class_path = os.path.join(
|
25 |
+
resource_path, "sources", "g2p_chinese_model", "polychar.txt"
|
26 |
+
)
|
27 |
+
if not os.path.exists(poly_all_class_path):
|
28 |
+
print(
|
29 |
+
"Incorrect path for polyphonic character class dictionary: {}, please check...".format(
|
30 |
+
poly_all_class_path
|
31 |
+
)
|
32 |
+
)
|
33 |
+
exit()
|
34 |
+
poly_dict = generate_poly_lexicon(poly_all_class_path)
|
35 |
+
|
36 |
+
# Set up G2PW model parameters
|
37 |
+
g2pw_poly_model_path = os.path.join(resource_path, "sources", "g2p_chinese_model")
|
38 |
+
if not os.path.exists(g2pw_poly_model_path):
|
39 |
+
print(
|
40 |
+
"Incorrect path for g2pw polyphonic character model: {}, please check...".format(
|
41 |
+
g2pw_poly_model_path
|
42 |
+
)
|
43 |
+
)
|
44 |
+
exit()
|
45 |
+
|
46 |
+
json_file_path = os.path.join(
|
47 |
+
resource_path, "sources", "g2p_chinese_model", "polydict.json"
|
48 |
+
)
|
49 |
+
if not os.path.exists(json_file_path):
|
50 |
+
print(
|
51 |
+
"Incorrect path for g2pw id to pinyin dictionary: {}, please check...".format(
|
52 |
+
json_file_path
|
53 |
+
)
|
54 |
+
)
|
55 |
+
exit()
|
56 |
+
|
57 |
+
jsonr_file_path = os.path.join(
|
58 |
+
resource_path, "sources", "g2p_chinese_model", "polydict_r.json"
|
59 |
+
)
|
60 |
+
if not os.path.exists(jsonr_file_path):
|
61 |
+
print(
|
62 |
+
"Incorrect path for g2pw pinyin to id dictionary: {}, please check...".format(
|
63 |
+
jsonr_file_path
|
64 |
+
)
|
65 |
+
)
|
66 |
+
exit()
|
67 |
+
|
68 |
+
g2pw_poly_predict = BertPolyPredict(
|
69 |
+
g2pw_poly_model_path, jsonr_file_path, json_file_path
|
70 |
+
)
|
71 |
+
|
72 |
+
|
73 |
+
"""
|
74 |
+
Text clean time
|
75 |
+
"""
|
76 |
+
# List of (Latin alphabet, bopomofo) pairs:
|
77 |
+
_latin_to_bopomofo = [
|
78 |
+
(re.compile("%s" % x[0], re.IGNORECASE), x[1])
|
79 |
+
for x in [
|
80 |
+
("a", "ㄟˉ"),
|
81 |
+
("b", "ㄅㄧˋ"),
|
82 |
+
("c", "ㄙㄧˉ"),
|
83 |
+
("d", "ㄉㄧˋ"),
|
84 |
+
("e", "ㄧˋ"),
|
85 |
+
("f", "ㄝˊㄈㄨˋ"),
|
86 |
+
("g", "ㄐㄧˋ"),
|
87 |
+
("h", "ㄝˇㄑㄩˋ"),
|
88 |
+
("i", "ㄞˋ"),
|
89 |
+
("j", "ㄐㄟˋ"),
|
90 |
+
("k", "ㄎㄟˋ"),
|
91 |
+
("l", "ㄝˊㄛˋ"),
|
92 |
+
("m", "ㄝˊㄇㄨˋ"),
|
93 |
+
("n", "ㄣˉ"),
|
94 |
+
("o", "ㄡˉ"),
|
95 |
+
("p", "ㄆㄧˉ"),
|
96 |
+
("q", "ㄎㄧㄡˉ"),
|
97 |
+
("r", "ㄚˋ"),
|
98 |
+
("s", "ㄝˊㄙˋ"),
|
99 |
+
("t", "ㄊㄧˋ"),
|
100 |
+
("u", "ㄧㄡˉ"),
|
101 |
+
("v", "ㄨㄧˉ"),
|
102 |
+
("w", "ㄉㄚˋㄅㄨˋㄌㄧㄡˋ"),
|
103 |
+
("x", "ㄝˉㄎㄨˋㄙˋ"),
|
104 |
+
("y", "ㄨㄞˋ"),
|
105 |
+
("z", "ㄗㄟˋ"),
|
106 |
+
]
|
107 |
+
]
|
108 |
+
|
109 |
+
# List of (bopomofo, ipa) pairs:
|
110 |
+
_bopomofo_to_ipa = [
|
111 |
+
(re.compile("%s" % x[0]), x[1])
|
112 |
+
for x in [
|
113 |
+
("ㄅㄛ", "p⁼wo"),
|
114 |
+
("ㄆㄛ", "pʰwo"),
|
115 |
+
("ㄇㄛ", "mwo"),
|
116 |
+
("ㄈㄛ", "fwo"),
|
117 |
+
("ㄧㄢ", "|jɛn"),
|
118 |
+
("ㄩㄢ", "|ɥæn"),
|
119 |
+
("ㄧㄣ", "|in"),
|
120 |
+
("ㄩㄣ", "|ɥn"),
|
121 |
+
("ㄧㄥ", "|iŋ"),
|
122 |
+
("ㄨㄥ", "|ʊŋ"),
|
123 |
+
("ㄩㄥ", "|jʊŋ"),
|
124 |
+
# Add
|
125 |
+
("ㄧㄚ", "|ia"),
|
126 |
+
("ㄧㄝ", "|iɛ"),
|
127 |
+
("ㄧㄠ", "|iɑʊ"),
|
128 |
+
("ㄧㄡ", "|ioʊ"),
|
129 |
+
("ㄧㄤ", "|iɑŋ"),
|
130 |
+
("ㄨㄚ", "|ua"),
|
131 |
+
("ㄨㄛ", "|uo"),
|
132 |
+
("ㄨㄞ", "|uaɪ"),
|
133 |
+
("ㄨㄟ", "|ueɪ"),
|
134 |
+
("ㄨㄢ", "|uan"),
|
135 |
+
("ㄨㄣ", "|uən"),
|
136 |
+
("ㄨㄤ", "|uɑŋ"),
|
137 |
+
("ㄩㄝ", "|ɥɛ"),
|
138 |
+
# End
|
139 |
+
("ㄅ", "p⁼"),
|
140 |
+
("ㄆ", "pʰ"),
|
141 |
+
("ㄇ", "m"),
|
142 |
+
("ㄈ", "f"),
|
143 |
+
("ㄉ", "t⁼"),
|
144 |
+
("ㄊ", "tʰ"),
|
145 |
+
("ㄋ", "n"),
|
146 |
+
("ㄌ", "l"),
|
147 |
+
("ㄍ", "k⁼"),
|
148 |
+
("ㄎ", "kʰ"),
|
149 |
+
("ㄏ", "x"),
|
150 |
+
("ㄐ", "tʃ⁼"),
|
151 |
+
("ㄑ", "tʃʰ"),
|
152 |
+
("ㄒ", "ʃ"),
|
153 |
+
("ㄓ", "ts`⁼"),
|
154 |
+
("ㄔ", "ts`ʰ"),
|
155 |
+
("ㄕ", "s`"),
|
156 |
+
("ㄖ", "ɹ`"),
|
157 |
+
("ㄗ", "ts⁼"),
|
158 |
+
("ㄘ", "tsʰ"),
|
159 |
+
("ㄙ", "|s"),
|
160 |
+
("ㄚ", "|a"),
|
161 |
+
("ㄛ", "|o"),
|
162 |
+
("ㄜ", "|ə"),
|
163 |
+
("ㄝ", "|ɛ"),
|
164 |
+
("ㄞ", "|aɪ"),
|
165 |
+
("ㄟ", "|eɪ"),
|
166 |
+
("ㄠ", "|ɑʊ"),
|
167 |
+
("ㄡ", "|oʊ"),
|
168 |
+
("ㄢ", "|an"),
|
169 |
+
("ㄣ", "|ən"),
|
170 |
+
("ㄤ", "|ɑŋ"),
|
171 |
+
("ㄥ", "|əŋ"),
|
172 |
+
("ㄦ", "əɹ"),
|
173 |
+
("ㄧ", "|i"),
|
174 |
+
("ㄨ", "|u"),
|
175 |
+
("ㄩ", "|ɥ"),
|
176 |
+
("ˉ", "→|"),
|
177 |
+
("ˊ", "↑|"),
|
178 |
+
("ˇ", "↓↑|"),
|
179 |
+
("ˋ", "↓|"),
|
180 |
+
("˙", "|"),
|
181 |
+
]
|
182 |
+
]
|
183 |
+
must_not_er_words = {"女儿", "老儿", "男儿", "少儿", "小儿"}
|
184 |
+
|
185 |
+
|
186 |
+
chinese_lexicon_path = hf_hub_download(
|
187 |
+
repo_id="ASLP-lab/DiffRhythm",
|
188 |
+
filename="diffrhythm/g2p/sources/chinese_lexicon.txt",
|
189 |
+
repo_type="space"
|
190 |
+
)
|
191 |
+
word_pinyin_dict = {}
|
192 |
+
with open(chinese_lexicon_path, "r", encoding="utf-8") as fread:
|
193 |
+
txt_list = fread.readlines()
|
194 |
+
for txt in txt_list:
|
195 |
+
word, pinyin = txt.strip().split("\t")
|
196 |
+
word_pinyin_dict[word] = pinyin
|
197 |
+
fread.close()
|
198 |
+
|
199 |
+
pinyin_2_bopomofo_dict = {}
|
200 |
+
with open(
|
201 |
+
r"./diffrhythm/g2p/sources/pinyin_2_bpmf.txt", "r", encoding="utf-8"
|
202 |
+
) as fread:
|
203 |
+
txt_list = fread.readlines()
|
204 |
+
for txt in txt_list:
|
205 |
+
pinyin, bopomofo = txt.strip().split("\t")
|
206 |
+
pinyin_2_bopomofo_dict[pinyin] = bopomofo
|
207 |
+
fread.close()
|
208 |
+
|
209 |
+
tone_dict = {
|
210 |
+
"0": "˙",
|
211 |
+
"5": "˙",
|
212 |
+
"1": "",
|
213 |
+
"2": "ˊ",
|
214 |
+
"3": "ˇ",
|
215 |
+
"4": "ˋ",
|
216 |
+
}
|
217 |
+
|
218 |
+
bopomofos2pinyin_dict = {}
|
219 |
+
with open(
|
220 |
+
r"./diffrhythm/g2p/sources/bpmf_2_pinyin.txt", "r", encoding="utf-8"
|
221 |
+
) as fread:
|
222 |
+
txt_list = fread.readlines()
|
223 |
+
for txt in txt_list:
|
224 |
+
v, k = txt.strip().split("\t")
|
225 |
+
bopomofos2pinyin_dict[k] = v
|
226 |
+
fread.close()
|
227 |
+
|
228 |
+
|
229 |
+
def bpmf_to_pinyin(text):
|
230 |
+
bopomofo_list = text.split("|")
|
231 |
+
pinyin_list = []
|
232 |
+
for info in bopomofo_list:
|
233 |
+
pinyin = ""
|
234 |
+
for c in info:
|
235 |
+
if c in bopomofos2pinyin_dict:
|
236 |
+
pinyin += bopomofos2pinyin_dict[c]
|
237 |
+
if len(pinyin) == 0:
|
238 |
+
continue
|
239 |
+
if pinyin[-1] not in "01234":
|
240 |
+
pinyin += "1"
|
241 |
+
if pinyin[:-1] == "ve":
|
242 |
+
pinyin = "y" + pinyin
|
243 |
+
if pinyin[:-1] == "sh":
|
244 |
+
pinyin = pinyin[:-1] + "i" + pinyin[-1]
|
245 |
+
if pinyin == "sh":
|
246 |
+
pinyin = pinyin[:-1] + "i"
|
247 |
+
if pinyin[:-1] == "s":
|
248 |
+
pinyin = "si" + pinyin[-1]
|
249 |
+
if pinyin[:-1] == "c":
|
250 |
+
pinyin = "ci" + pinyin[-1]
|
251 |
+
if pinyin[:-1] == "i":
|
252 |
+
pinyin = "yi" + pinyin[-1]
|
253 |
+
if pinyin[:-1] == "iou":
|
254 |
+
pinyin = "you" + pinyin[-1]
|
255 |
+
if pinyin[:-1] == "ien":
|
256 |
+
pinyin = "yin" + pinyin[-1]
|
257 |
+
if "iou" in pinyin and pinyin[-4:-1] == "iou":
|
258 |
+
pinyin = pinyin[:-4] + "iu" + pinyin[-1]
|
259 |
+
if "uei" in pinyin:
|
260 |
+
if pinyin[:-1] == "uei":
|
261 |
+
pinyin = "wei" + pinyin[-1]
|
262 |
+
elif pinyin[-4:-1] == "uei":
|
263 |
+
pinyin = pinyin[:-4] + "ui" + pinyin[-1]
|
264 |
+
if "uen" in pinyin and pinyin[-4:-1] == "uen":
|
265 |
+
if pinyin[:-1] == "uen":
|
266 |
+
pinyin = "wen" + pinyin[-1]
|
267 |
+
elif pinyin[-4:-1] == "uei":
|
268 |
+
pinyin = pinyin[:-4] + "un" + pinyin[-1]
|
269 |
+
if "van" in pinyin and pinyin[-4:-1] == "van":
|
270 |
+
if pinyin[:-1] == "van":
|
271 |
+
pinyin = "yuan" + pinyin[-1]
|
272 |
+
elif pinyin[-4:-1] == "van":
|
273 |
+
pinyin = pinyin[:-4] + "uan" + pinyin[-1]
|
274 |
+
if "ueng" in pinyin and pinyin[-5:-1] == "ueng":
|
275 |
+
pinyin = pinyin[:-5] + "ong" + pinyin[-1]
|
276 |
+
if pinyin[:-1] == "veng":
|
277 |
+
pinyin = "yong" + pinyin[-1]
|
278 |
+
if "veng" in pinyin and pinyin[-5:-1] == "veng":
|
279 |
+
pinyin = pinyin[:-5] + "iong" + pinyin[-1]
|
280 |
+
if pinyin[:-1] == "ieng":
|
281 |
+
pinyin = "ying" + pinyin[-1]
|
282 |
+
if pinyin[:-1] == "u":
|
283 |
+
pinyin = "wu" + pinyin[-1]
|
284 |
+
if pinyin[:-1] == "v":
|
285 |
+
pinyin = "yv" + pinyin[-1]
|
286 |
+
if pinyin[:-1] == "ing":
|
287 |
+
pinyin = "ying" + pinyin[-1]
|
288 |
+
if pinyin[:-1] == "z":
|
289 |
+
pinyin = "zi" + pinyin[-1]
|
290 |
+
if pinyin[:-1] == "zh":
|
291 |
+
pinyin = "zhi" + pinyin[-1]
|
292 |
+
if pinyin[0] == "u":
|
293 |
+
pinyin = "w" + pinyin[1:]
|
294 |
+
if pinyin[0] == "i":
|
295 |
+
pinyin = "y" + pinyin[1:]
|
296 |
+
pinyin = pinyin.replace("ien", "in")
|
297 |
+
|
298 |
+
pinyin_list.append(pinyin)
|
299 |
+
return " ".join(pinyin_list)
|
300 |
+
|
301 |
+
|
302 |
+
# Convert numbers to Chinese pronunciation
|
303 |
+
def number_to_chinese(text):
|
304 |
+
# numbers = re.findall(r'\d+(?:\.?\d+)?', text)
|
305 |
+
# for number in numbers:
|
306 |
+
# text = text.replace(number, cn2an.an2cn(number), 1)
|
307 |
+
text = cn2an.transform(text, "an2cn")
|
308 |
+
return text
|
309 |
+
|
310 |
+
|
311 |
+
def normalization(text):
|
312 |
+
text = text.replace(",", ",")
|
313 |
+
text = text.replace("。", ".")
|
314 |
+
text = text.replace("!", "!")
|
315 |
+
text = text.replace("?", "?")
|
316 |
+
text = text.replace(";", ";")
|
317 |
+
text = text.replace(":", ":")
|
318 |
+
text = text.replace("、", ",")
|
319 |
+
text = text.replace("‘", "'")
|
320 |
+
text = text.replace("’", "'")
|
321 |
+
text = text.replace("⋯", "…")
|
322 |
+
text = text.replace("···", "…")
|
323 |
+
text = text.replace("・・・", "…")
|
324 |
+
text = text.replace("...", "…")
|
325 |
+
text = re.sub(r"\s+", "", text)
|
326 |
+
text = re.sub(r"[^\u4e00-\u9fff\s_,\.\?!;:\'…]", "", text)
|
327 |
+
text = re.sub(r"\s*([,\.\?!;:\'…])\s*", r"\1", text)
|
328 |
+
return text
|
329 |
+
|
330 |
+
|
331 |
+
def change_tone(bopomofo: str, tone: str) -> str:
|
332 |
+
if bopomofo[-1] not in "˙ˊˇˋ":
|
333 |
+
bopomofo = bopomofo + tone
|
334 |
+
else:
|
335 |
+
bopomofo = bopomofo[:-1] + tone
|
336 |
+
return bopomofo
|
337 |
+
|
338 |
+
|
339 |
+
def er_sandhi(word: str, bopomofos: List[str]) -> List[str]:
|
340 |
+
if len(word) > 1 and word[-1] == "儿" and word not in must_not_er_words:
|
341 |
+
bopomofos[-1] = change_tone(bopomofos[-1], "˙")
|
342 |
+
return bopomofos
|
343 |
+
|
344 |
+
|
345 |
+
def bu_sandhi(word: str, bopomofos: List[str]) -> List[str]:
|
346 |
+
valid_char = set(word)
|
347 |
+
if len(valid_char) == 1 and "不" in valid_char:
|
348 |
+
pass
|
349 |
+
elif word in ["不字"]:
|
350 |
+
pass
|
351 |
+
elif len(word) == 3 and word[1] == "不" and bopomofos[1][:-1] == "ㄅㄨ":
|
352 |
+
bopomofos[1] = bopomofos[1][:-1] + "˙"
|
353 |
+
else:
|
354 |
+
for i, char in enumerate(word):
|
355 |
+
if (
|
356 |
+
i + 1 < len(bopomofos)
|
357 |
+
and char == "不"
|
358 |
+
and i + 1 < len(word)
|
359 |
+
and 0 < len(bopomofos[i + 1])
|
360 |
+
and bopomofos[i + 1][-1] == "ˋ"
|
361 |
+
):
|
362 |
+
bopomofos[i] = bopomofos[i][:-1] + "ˊ"
|
363 |
+
return bopomofos
|
364 |
+
|
365 |
+
|
366 |
+
def yi_sandhi(word: str, bopomofos: List[str]) -> List[str]:
|
367 |
+
punc = ":,;。?!“”‘’':,;.?!()(){}【】[]-~`、 "
|
368 |
+
if word.find("一") != -1 and any(
|
369 |
+
[item.isnumeric() for item in word if item != "一"]
|
370 |
+
):
|
371 |
+
for i in range(len(word)):
|
372 |
+
if (
|
373 |
+
i == 0
|
374 |
+
and word[0] == "一"
|
375 |
+
and len(word) > 1
|
376 |
+
and word[1]
|
377 |
+
not in [
|
378 |
+
"零",
|
379 |
+
"一",
|
380 |
+
"二",
|
381 |
+
"三",
|
382 |
+
"四",
|
383 |
+
"五",
|
384 |
+
"六",
|
385 |
+
"七",
|
386 |
+
"八",
|
387 |
+
"九",
|
388 |
+
"十",
|
389 |
+
]
|
390 |
+
):
|
391 |
+
if len(bopomofos[0]) > 0 and bopomofos[1][-1] in ["ˋ", "˙"]:
|
392 |
+
bopomofos[0] = change_tone(bopomofos[0], "ˊ")
|
393 |
+
else:
|
394 |
+
bopomofos[0] = change_tone(bopomofos[0], "ˋ")
|
395 |
+
elif word[i] == "一":
|
396 |
+
bopomofos[i] = change_tone(bopomofos[i], "")
|
397 |
+
return bopomofos
|
398 |
+
elif len(word) == 3 and word[1] == "一" and word[0] == word[-1]:
|
399 |
+
bopomofos[1] = change_tone(bopomofos[1], "˙")
|
400 |
+
elif word.startswith("第一"):
|
401 |
+
bopomofos[1] = change_tone(bopomofos[1], "")
|
402 |
+
elif word.startswith("一月") or word.startswith("一日") or word.startswith("一号"):
|
403 |
+
bopomofos[0] = change_tone(bopomofos[0], "")
|
404 |
+
else:
|
405 |
+
for i, char in enumerate(word):
|
406 |
+
if char == "一" and i + 1 < len(word):
|
407 |
+
if (
|
408 |
+
len(bopomofos) > i + 1
|
409 |
+
and len(bopomofos[i + 1]) > 0
|
410 |
+
and bopomofos[i + 1][-1] in {"ˋ"}
|
411 |
+
):
|
412 |
+
bopomofos[i] = change_tone(bopomofos[i], "ˊ")
|
413 |
+
else:
|
414 |
+
if word[i + 1] not in punc:
|
415 |
+
bopomofos[i] = change_tone(bopomofos[i], "ˋ")
|
416 |
+
else:
|
417 |
+
pass
|
418 |
+
return bopomofos
|
419 |
+
|
420 |
+
|
421 |
+
def merge_bu(seg: List) -> List:
|
422 |
+
new_seg = []
|
423 |
+
last_word = ""
|
424 |
+
for word in seg:
|
425 |
+
if word != "不":
|
426 |
+
if last_word == "不":
|
427 |
+
word = last_word + word
|
428 |
+
new_seg.append(word)
|
429 |
+
last_word = word
|
430 |
+
return new_seg
|
431 |
+
|
432 |
+
|
433 |
+
def merge_er(seg: List) -> List:
|
434 |
+
new_seg = []
|
435 |
+
for i, word in enumerate(seg):
|
436 |
+
if i - 1 >= 0 and word == "儿":
|
437 |
+
new_seg[-1] = new_seg[-1] + seg[i]
|
438 |
+
else:
|
439 |
+
new_seg.append(word)
|
440 |
+
return new_seg
|
441 |
+
|
442 |
+
|
443 |
+
def merge_yi(seg: List) -> List:
|
444 |
+
new_seg = []
|
445 |
+
# function 1
|
446 |
+
for i, word in enumerate(seg):
|
447 |
+
if (
|
448 |
+
i - 1 >= 0
|
449 |
+
and word == "一"
|
450 |
+
and i + 1 < len(seg)
|
451 |
+
and seg[i - 1] == seg[i + 1]
|
452 |
+
):
|
453 |
+
if i - 1 < len(new_seg):
|
454 |
+
new_seg[i - 1] = new_seg[i - 1] + "一" + new_seg[i - 1]
|
455 |
+
else:
|
456 |
+
new_seg.append(word)
|
457 |
+
new_seg.append(seg[i + 1])
|
458 |
+
else:
|
459 |
+
if i - 2 >= 0 and seg[i - 1] == "一" and seg[i - 2] == word:
|
460 |
+
continue
|
461 |
+
else:
|
462 |
+
new_seg.append(word)
|
463 |
+
seg = new_seg
|
464 |
+
new_seg = []
|
465 |
+
isnumeric_flag = False
|
466 |
+
for i, word in enumerate(seg):
|
467 |
+
if all([item.isnumeric() for item in word]) and not isnumeric_flag:
|
468 |
+
isnumeric_flag = True
|
469 |
+
new_seg.append(word)
|
470 |
+
else:
|
471 |
+
new_seg.append(word)
|
472 |
+
seg = new_seg
|
473 |
+
new_seg = []
|
474 |
+
# function 2
|
475 |
+
for i, word in enumerate(seg):
|
476 |
+
if new_seg and new_seg[-1] == "一":
|
477 |
+
new_seg[-1] = new_seg[-1] + word
|
478 |
+
else:
|
479 |
+
new_seg.append(word)
|
480 |
+
return new_seg
|
481 |
+
|
482 |
+
|
483 |
+
# Word Segmentation, and convert Chinese pronunciation to pinyin (bopomofo)
|
484 |
+
def chinese_to_bopomofo(text_short, sentence):
|
485 |
+
# bopomofos = conv(text_short)
|
486 |
+
words = jieba.lcut(text_short, cut_all=False)
|
487 |
+
words = merge_yi(words)
|
488 |
+
words = merge_bu(words)
|
489 |
+
words = merge_er(words)
|
490 |
+
text = ""
|
491 |
+
|
492 |
+
char_index = 0
|
493 |
+
for word in words:
|
494 |
+
bopomofos = []
|
495 |
+
if word in word_pinyin_dict and word not in poly_dict:
|
496 |
+
pinyin = word_pinyin_dict[word]
|
497 |
+
for py in pinyin.split(" "):
|
498 |
+
if py[:-1] in pinyin_2_bopomofo_dict and py[-1] in tone_dict:
|
499 |
+
bopomofos.append(
|
500 |
+
pinyin_2_bopomofo_dict[py[:-1]] + tone_dict[py[-1]]
|
501 |
+
)
|
502 |
+
if BLANK_LEVEL == 1:
|
503 |
+
bopomofos.append("_")
|
504 |
+
else:
|
505 |
+
bopomofos_lazy = lazy_pinyin(word, BOPOMOFO)
|
506 |
+
bopomofos += bopomofos_lazy
|
507 |
+
if BLANK_LEVEL == 1:
|
508 |
+
bopomofos.append("_")
|
509 |
+
else:
|
510 |
+
for i in range(len(word)):
|
511 |
+
c = word[i]
|
512 |
+
if c in poly_dict:
|
513 |
+
poly_pinyin = g2pw_poly_predict.predict_process(
|
514 |
+
[text_short, char_index + i]
|
515 |
+
)[0]
|
516 |
+
py = poly_pinyin[2:-1]
|
517 |
+
bopomofos.append(
|
518 |
+
pinyin_2_bopomofo_dict[py[:-1]] + tone_dict[py[-1]]
|
519 |
+
)
|
520 |
+
if BLANK_LEVEL == 1:
|
521 |
+
bopomofos.append("_")
|
522 |
+
elif c in word_pinyin_dict:
|
523 |
+
py = word_pinyin_dict[c]
|
524 |
+
bopomofos.append(
|
525 |
+
pinyin_2_bopomofo_dict[py[:-1]] + tone_dict[py[-1]]
|
526 |
+
)
|
527 |
+
if BLANK_LEVEL == 1:
|
528 |
+
bopomofos.append("_")
|
529 |
+
else:
|
530 |
+
bopomofos.append(c)
|
531 |
+
if BLANK_LEVEL == 1:
|
532 |
+
bopomofos.append("_")
|
533 |
+
if BLANK_LEVEL == 2:
|
534 |
+
bopomofos.append("_")
|
535 |
+
char_index += len(word)
|
536 |
+
|
537 |
+
if (
|
538 |
+
len(word) == 3
|
539 |
+
and bopomofos[0][-1] == "ˇ"
|
540 |
+
and bopomofos[1][-1] == "ˇ"
|
541 |
+
and bopomofos[-1][-1] == "ˇ"
|
542 |
+
):
|
543 |
+
bopomofos[0] = bopomofos[0] + "ˊ"
|
544 |
+
bopomofos[1] = bopomofos[1] + "ˊ"
|
545 |
+
if len(word) == 2 and bopomofos[0][-1] == "ˇ" and bopomofos[-1][-1] == "ˇ":
|
546 |
+
bopomofos[0] = bopomofos[0][:-1] + "ˊ"
|
547 |
+
bopomofos = bu_sandhi(word, bopomofos)
|
548 |
+
bopomofos = yi_sandhi(word, bopomofos)
|
549 |
+
bopomofos = er_sandhi(word, bopomofos)
|
550 |
+
if not re.search("[\u4e00-\u9fff]", word):
|
551 |
+
text += "|" + word
|
552 |
+
continue
|
553 |
+
for i in range(len(bopomofos)):
|
554 |
+
bopomofos[i] = re.sub(r"([\u3105-\u3129])$", r"\1ˉ", bopomofos[i])
|
555 |
+
if text != "":
|
556 |
+
text += "|"
|
557 |
+
text += "|".join(bopomofos)
|
558 |
+
return text
|
559 |
+
|
560 |
+
|
561 |
+
# Convert latin pronunciation to pinyin (bopomofo)
|
562 |
+
def latin_to_bopomofo(text):
|
563 |
+
for regex, replacement in _latin_to_bopomofo:
|
564 |
+
text = re.sub(regex, replacement, text)
|
565 |
+
return text
|
566 |
+
|
567 |
+
|
568 |
+
# Convert pinyin (bopomofo) to IPA
|
569 |
+
def bopomofo_to_ipa(text):
|
570 |
+
for regex, replacement in _bopomofo_to_ipa:
|
571 |
+
text = re.sub(regex, replacement, text)
|
572 |
+
return text
|
573 |
+
|
574 |
+
|
575 |
+
def _chinese_to_ipa(text, sentence):
|
576 |
+
text = number_to_chinese(text.strip())
|
577 |
+
text = normalization(text)
|
578 |
+
text = chinese_to_bopomofo(text, sentence)
|
579 |
+
# pinyin = bpmf_to_pinyin(text)
|
580 |
+
text = latin_to_bopomofo(text)
|
581 |
+
text = bopomofo_to_ipa(text)
|
582 |
+
text = re.sub("([sɹ]`[⁼ʰ]?)([→↓↑ ]+|$)", r"\1ɹ\2", text)
|
583 |
+
text = re.sub("([s][⁼ʰ]?)([→↓↑ ]+|$)", r"\1ɹ\2", text)
|
584 |
+
text = re.sub(r"^\||[^\w\s_,\.\?!;:\'…\|→↓↑⁼ʰ`]", "", text)
|
585 |
+
text = re.sub(r"([,\.\?!;:\'…])", r"|\1|", text)
|
586 |
+
text = re.sub(r"\|+", "|", text)
|
587 |
+
text = text.rstrip("|")
|
588 |
+
return text
|
589 |
+
|
590 |
+
|
591 |
+
# Convert Chinese to IPA
|
592 |
+
def chinese_to_ipa(text, sentence, text_tokenizer):
|
593 |
+
# phonemes = text_tokenizer(text.strip())
|
594 |
+
if type(text) == str:
|
595 |
+
return _chinese_to_ipa(text, sentence)
|
596 |
+
else:
|
597 |
+
result_ph = []
|
598 |
+
for t in text:
|
599 |
+
result_ph.append(_chinese_to_ipa(t, sentence))
|
600 |
+
return result_ph
|
diffrhythm/g2p/g2p/text_tokenizers.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2024 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
import re
|
7 |
+
import os
|
8 |
+
from typing import List, Pattern, Union
|
9 |
+
from phonemizer.utils import list2str, str2list
|
10 |
+
from phonemizer.backend import EspeakBackend
|
11 |
+
from phonemizer.backend.espeak.language_switch import LanguageSwitch
|
12 |
+
from phonemizer.backend.espeak.words_mismatch import WordMismatch
|
13 |
+
from phonemizer.punctuation import Punctuation
|
14 |
+
# from zer.separator import Separator
|
15 |
+
from phonemizer.separator import Separator
|
16 |
+
|
17 |
+
|
18 |
+
class TextTokenizer:
|
19 |
+
"""Phonemize Text."""
|
20 |
+
|
21 |
+
def __init__(
|
22 |
+
self,
|
23 |
+
language="en-us",
|
24 |
+
backend="espeak",
|
25 |
+
separator=Separator(word="|_|", syllable="-", phone="|"),
|
26 |
+
preserve_punctuation=True,
|
27 |
+
with_stress: bool = False,
|
28 |
+
tie: Union[bool, str] = False,
|
29 |
+
language_switch: LanguageSwitch = "remove-flags",
|
30 |
+
words_mismatch: WordMismatch = "ignore",
|
31 |
+
) -> None:
|
32 |
+
self.preserve_punctuation_marks = ",.?!;:'…"
|
33 |
+
self.backend = EspeakBackend(
|
34 |
+
language,
|
35 |
+
punctuation_marks=self.preserve_punctuation_marks,
|
36 |
+
preserve_punctuation=preserve_punctuation,
|
37 |
+
with_stress=with_stress,
|
38 |
+
tie=tie,
|
39 |
+
language_switch=language_switch,
|
40 |
+
words_mismatch=words_mismatch,
|
41 |
+
)
|
42 |
+
|
43 |
+
self.separator = separator
|
44 |
+
|
45 |
+
# convert chinese punctuation to english punctuation
|
46 |
+
def convert_chinese_punctuation(self, text: str) -> str:
|
47 |
+
text = text.replace(",", ",")
|
48 |
+
text = text.replace("。", ".")
|
49 |
+
text = text.replace("!", "!")
|
50 |
+
text = text.replace("?", "?")
|
51 |
+
text = text.replace(";", ";")
|
52 |
+
text = text.replace(":", ":")
|
53 |
+
text = text.replace("、", ",")
|
54 |
+
text = text.replace("‘", "'")
|
55 |
+
text = text.replace("’", "'")
|
56 |
+
text = text.replace("⋯", "…")
|
57 |
+
text = text.replace("···", "…")
|
58 |
+
text = text.replace("・・・", "…")
|
59 |
+
text = text.replace("...", "…")
|
60 |
+
return text
|
61 |
+
|
62 |
+
def __call__(self, text, strip=True) -> List[str]:
|
63 |
+
|
64 |
+
text_type = type(text)
|
65 |
+
normalized_text = []
|
66 |
+
for line in str2list(text):
|
67 |
+
line = self.convert_chinese_punctuation(line.strip())
|
68 |
+
line = re.sub(r"[^\w\s_,\.\?!;:\'…]", "", line)
|
69 |
+
line = re.sub(r"\s*([,\.\?!;:\'…])\s*", r"\1", line)
|
70 |
+
line = re.sub(r"\s+", " ", line)
|
71 |
+
normalized_text.append(line)
|
72 |
+
# print("Normalized test: ", normalized_text[0])
|
73 |
+
phonemized = self.backend.phonemize(
|
74 |
+
normalized_text, separator=self.separator, strip=strip, njobs=1
|
75 |
+
)
|
76 |
+
if text_type == str:
|
77 |
+
phonemized = re.sub(r"([,\.\?!;:\'…])", r"|\1|", list2str(phonemized))
|
78 |
+
phonemized = re.sub(r"\|+", "|", phonemized)
|
79 |
+
phonemized = phonemized.rstrip("|")
|
80 |
+
else:
|
81 |
+
for i in range(len(phonemized)):
|
82 |
+
phonemized[i] = re.sub(r"([,\.\?!;:\'…])", r"|\1|", phonemized[i])
|
83 |
+
phonemized[i] = re.sub(r"\|+", "|", phonemized[i])
|
84 |
+
phonemized[i] = phonemized[i].rstrip("|")
|
85 |
+
return phonemized
|
diffrhythm/g2p/g2p/vocab.json
ADDED
@@ -0,0 +1,372 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"vocab": {
|
3 |
+
",": 0,
|
4 |
+
".": 1,
|
5 |
+
"?": 2,
|
6 |
+
"!": 3,
|
7 |
+
"_": 4,
|
8 |
+
"iː": 5,
|
9 |
+
"ɪ": 6,
|
10 |
+
"ɜː": 7,
|
11 |
+
"ɚ": 8,
|
12 |
+
"oːɹ": 9,
|
13 |
+
"ɔː": 10,
|
14 |
+
"ɔːɹ": 11,
|
15 |
+
"ɑː": 12,
|
16 |
+
"uː": 13,
|
17 |
+
"ʊ": 14,
|
18 |
+
"ɑːɹ": 15,
|
19 |
+
"ʌ": 16,
|
20 |
+
"ɛ": 17,
|
21 |
+
"æ": 18,
|
22 |
+
"eɪ": 19,
|
23 |
+
"aɪ": 20,
|
24 |
+
"ɔɪ": 21,
|
25 |
+
"aʊ": 22,
|
26 |
+
"oʊ": 23,
|
27 |
+
"ɪɹ": 24,
|
28 |
+
"ɛɹ": 25,
|
29 |
+
"ʊɹ": 26,
|
30 |
+
"p": 27,
|
31 |
+
"b": 28,
|
32 |
+
"t": 29,
|
33 |
+
"d": 30,
|
34 |
+
"k": 31,
|
35 |
+
"ɡ": 32,
|
36 |
+
"f": 33,
|
37 |
+
"v": 34,
|
38 |
+
"θ": 35,
|
39 |
+
"ð": 36,
|
40 |
+
"s": 37,
|
41 |
+
"z": 38,
|
42 |
+
"ʃ": 39,
|
43 |
+
"ʒ": 40,
|
44 |
+
"h": 41,
|
45 |
+
"tʃ": 42,
|
46 |
+
"dʒ": 43,
|
47 |
+
"m": 44,
|
48 |
+
"n": 45,
|
49 |
+
"ŋ": 46,
|
50 |
+
"j": 47,
|
51 |
+
"w": 48,
|
52 |
+
"ɹ": 49,
|
53 |
+
"l": 50,
|
54 |
+
"tɹ": 51,
|
55 |
+
"dɹ": 52,
|
56 |
+
"ts": 53,
|
57 |
+
"dz": 54,
|
58 |
+
"i": 55,
|
59 |
+
"ɔ": 56,
|
60 |
+
"ə": 57,
|
61 |
+
"ɾ": 58,
|
62 |
+
"iə": 59,
|
63 |
+
"r": 60,
|
64 |
+
"u": 61,
|
65 |
+
"oː": 62,
|
66 |
+
"ɛː": 63,
|
67 |
+
"ɪː": 64,
|
68 |
+
"aɪə": 65,
|
69 |
+
"aɪɚ": 66,
|
70 |
+
"ɑ̃": 67,
|
71 |
+
"ç": 68,
|
72 |
+
"ɔ̃": 69,
|
73 |
+
"ææ": 70,
|
74 |
+
"ɐɐ": 71,
|
75 |
+
"ɡʲ": 72,
|
76 |
+
"nʲ": 73,
|
77 |
+
"iːː": 74,
|
78 |
+
|
79 |
+
"p⁼": 75,
|
80 |
+
"pʰ": 76,
|
81 |
+
"t⁼": 77,
|
82 |
+
"tʰ": 78,
|
83 |
+
"k⁼": 79,
|
84 |
+
"kʰ": 80,
|
85 |
+
"x": 81,
|
86 |
+
"tʃ⁼": 82,
|
87 |
+
"tʃʰ": 83,
|
88 |
+
"ts`⁼": 84,
|
89 |
+
"ts`ʰ": 85,
|
90 |
+
"s`": 86,
|
91 |
+
"ɹ`": 87,
|
92 |
+
"ts⁼": 88,
|
93 |
+
"tsʰ": 89,
|
94 |
+
"p⁼wo": 90,
|
95 |
+
"p⁼wo→": 91,
|
96 |
+
"p⁼wo↑": 92,
|
97 |
+
"p⁼wo↓↑": 93,
|
98 |
+
"p⁼wo↓": 94,
|
99 |
+
"pʰwo": 95,
|
100 |
+
"pʰwo→": 96,
|
101 |
+
"pʰwo↑": 97,
|
102 |
+
"pʰwo↓↑": 98,
|
103 |
+
"pʰwo↓": 99,
|
104 |
+
"mwo": 100,
|
105 |
+
"mwo→": 101,
|
106 |
+
"mwo↑": 102,
|
107 |
+
"mwo↓↑": 103,
|
108 |
+
"mwo↓": 104,
|
109 |
+
"fwo": 105,
|
110 |
+
"fwo→": 106,
|
111 |
+
"fwo↑": 107,
|
112 |
+
"fwo↓↑": 108,
|
113 |
+
"fwo↓": 109,
|
114 |
+
"jɛn": 110,
|
115 |
+
"jɛn→": 111,
|
116 |
+
"jɛn↑": 112,
|
117 |
+
"jɛn↓↑": 113,
|
118 |
+
"jɛn↓": 114,
|
119 |
+
"ɥæn": 115,
|
120 |
+
"ɥæn→": 116,
|
121 |
+
"ɥæn↑": 117,
|
122 |
+
"ɥæn↓↑": 118,
|
123 |
+
"ɥæn↓": 119,
|
124 |
+
"in": 120,
|
125 |
+
"in→": 121,
|
126 |
+
"in↑": 122,
|
127 |
+
"in↓↑": 123,
|
128 |
+
"in↓": 124,
|
129 |
+
"ɥn": 125,
|
130 |
+
"ɥn→": 126,
|
131 |
+
"ɥn↑": 127,
|
132 |
+
"ɥn↓↑": 128,
|
133 |
+
"ɥn↓": 129,
|
134 |
+
"iŋ": 130,
|
135 |
+
"iŋ→": 131,
|
136 |
+
"iŋ↑": 132,
|
137 |
+
"iŋ↓↑": 133,
|
138 |
+
"iŋ↓": 134,
|
139 |
+
"ʊŋ": 135,
|
140 |
+
"ʊŋ→": 136,
|
141 |
+
"ʊŋ↑": 137,
|
142 |
+
"ʊŋ↓↑": 138,
|
143 |
+
"ʊŋ↓": 139,
|
144 |
+
"jʊŋ": 140,
|
145 |
+
"jʊŋ→": 141,
|
146 |
+
"jʊŋ↑": 142,
|
147 |
+
"jʊŋ↓↑": 143,
|
148 |
+
"jʊŋ↓": 144,
|
149 |
+
"ia": 145,
|
150 |
+
"ia→": 146,
|
151 |
+
"ia↑": 147,
|
152 |
+
"ia↓↑": 148,
|
153 |
+
"ia↓": 149,
|
154 |
+
"iɛ": 150,
|
155 |
+
"iɛ→": 151,
|
156 |
+
"iɛ↑": 152,
|
157 |
+
"iɛ↓↑": 153,
|
158 |
+
"iɛ↓": 154,
|
159 |
+
"iɑʊ": 155,
|
160 |
+
"iɑʊ→": 156,
|
161 |
+
"iɑʊ↑": 157,
|
162 |
+
"iɑʊ↓↑": 158,
|
163 |
+
"iɑʊ↓": 159,
|
164 |
+
"ioʊ": 160,
|
165 |
+
"ioʊ→": 161,
|
166 |
+
"ioʊ↑": 162,
|
167 |
+
"ioʊ↓↑": 163,
|
168 |
+
"ioʊ↓": 164,
|
169 |
+
"iɑŋ": 165,
|
170 |
+
"iɑŋ→": 166,
|
171 |
+
"iɑŋ↑": 167,
|
172 |
+
"iɑŋ↓↑": 168,
|
173 |
+
"iɑŋ↓": 169,
|
174 |
+
"ua": 170,
|
175 |
+
"ua→": 171,
|
176 |
+
"ua↑": 172,
|
177 |
+
"ua↓↑": 173,
|
178 |
+
"ua↓": 174,
|
179 |
+
"uo": 175,
|
180 |
+
"uo→": 176,
|
181 |
+
"uo↑": 177,
|
182 |
+
"uo↓↑": 178,
|
183 |
+
"uo↓": 179,
|
184 |
+
"uaɪ": 180,
|
185 |
+
"uaɪ→": 181,
|
186 |
+
"uaɪ↑": 182,
|
187 |
+
"uaɪ↓↑": 183,
|
188 |
+
"uaɪ↓": 184,
|
189 |
+
"ueɪ": 185,
|
190 |
+
"ueɪ→": 186,
|
191 |
+
"ueɪ↑": 187,
|
192 |
+
"ueɪ↓↑": 188,
|
193 |
+
"ueɪ↓": 189,
|
194 |
+
"uan": 190,
|
195 |
+
"uan→": 191,
|
196 |
+
"uan↑": 192,
|
197 |
+
"uan↓↑": 193,
|
198 |
+
"uan↓": 194,
|
199 |
+
"uən": 195,
|
200 |
+
"uən→": 196,
|
201 |
+
"uən↑": 197,
|
202 |
+
"uən↓↑": 198,
|
203 |
+
"uən↓": 199,
|
204 |
+
"uɑŋ": 200,
|
205 |
+
"uɑŋ→": 201,
|
206 |
+
"uɑŋ↑": 202,
|
207 |
+
"uɑŋ↓↑": 203,
|
208 |
+
"uɑŋ↓": 204,
|
209 |
+
"ɥɛ": 205,
|
210 |
+
"ɥɛ→": 206,
|
211 |
+
"ɥɛ↑": 207,
|
212 |
+
"ɥɛ↓↑": 208,
|
213 |
+
"ɥɛ↓": 209,
|
214 |
+
"a": 210,
|
215 |
+
"a→": 211,
|
216 |
+
"a↑": 212,
|
217 |
+
"a↓↑": 213,
|
218 |
+
"a↓": 214,
|
219 |
+
"o": 215,
|
220 |
+
"o→": 216,
|
221 |
+
"o↑": 217,
|
222 |
+
"o↓↑": 218,
|
223 |
+
"o↓": 219,
|
224 |
+
"ə→": 220,
|
225 |
+
"ə↑": 221,
|
226 |
+
"ə↓↑": 222,
|
227 |
+
"ə↓": 223,
|
228 |
+
"ɛ→": 224,
|
229 |
+
"ɛ↑": 225,
|
230 |
+
"ɛ↓↑": 226,
|
231 |
+
"ɛ↓": 227,
|
232 |
+
"aɪ→": 228,
|
233 |
+
"aɪ↑": 229,
|
234 |
+
"aɪ↓↑": 230,
|
235 |
+
"aɪ↓": 231,
|
236 |
+
"eɪ→": 232,
|
237 |
+
"eɪ↑": 233,
|
238 |
+
"eɪ↓↑": 234,
|
239 |
+
"eɪ↓": 235,
|
240 |
+
"ɑʊ": 236,
|
241 |
+
"ɑʊ→": 237,
|
242 |
+
"ɑʊ↑": 238,
|
243 |
+
"ɑʊ↓↑": 239,
|
244 |
+
"ɑʊ↓": 240,
|
245 |
+
"oʊ→": 241,
|
246 |
+
"oʊ↑": 242,
|
247 |
+
"oʊ↓↑": 243,
|
248 |
+
"oʊ↓": 244,
|
249 |
+
"an": 245,
|
250 |
+
"an→": 246,
|
251 |
+
"an↑": 247,
|
252 |
+
"an↓↑": 248,
|
253 |
+
"an↓": 249,
|
254 |
+
"ən": 250,
|
255 |
+
"ən→": 251,
|
256 |
+
"ən↑": 252,
|
257 |
+
"ən↓↑": 253,
|
258 |
+
"ən↓": 254,
|
259 |
+
"ɑŋ": 255,
|
260 |
+
"ɑŋ→": 256,
|
261 |
+
"ɑŋ↑": 257,
|
262 |
+
"ɑŋ↓↑": 258,
|
263 |
+
"ɑŋ↓": 259,
|
264 |
+
"əŋ": 260,
|
265 |
+
"əŋ→": 261,
|
266 |
+
"əŋ↑": 262,
|
267 |
+
"əŋ↓↑": 263,
|
268 |
+
"əŋ↓": 264,
|
269 |
+
"əɹ": 265,
|
270 |
+
"əɹ→": 266,
|
271 |
+
"əɹ↑": 267,
|
272 |
+
"əɹ↓↑": 268,
|
273 |
+
"əɹ↓": 269,
|
274 |
+
"i→": 270,
|
275 |
+
"i↑": 271,
|
276 |
+
"i↓↑": 272,
|
277 |
+
"i↓": 273,
|
278 |
+
"u→": 274,
|
279 |
+
"u↑": 275,
|
280 |
+
"u↓↑": 276,
|
281 |
+
"u↓": 277,
|
282 |
+
"ɥ": 278,
|
283 |
+
"ɥ→": 279,
|
284 |
+
"ɥ↑": 280,
|
285 |
+
"ɥ↓↑": 281,
|
286 |
+
"ɥ↓": 282,
|
287 |
+
"ts`⁼ɹ": 283,
|
288 |
+
"ts`⁼ɹ→": 284,
|
289 |
+
"ts`⁼ɹ↑": 285,
|
290 |
+
"ts`⁼ɹ↓↑": 286,
|
291 |
+
"ts`⁼ɹ↓": 287,
|
292 |
+
"ts`ʰɹ": 288,
|
293 |
+
"ts`ʰɹ→": 289,
|
294 |
+
"ts`ʰɹ↑": 290,
|
295 |
+
"ts`ʰɹ↓↑": 291,
|
296 |
+
"ts`ʰɹ↓": 292,
|
297 |
+
"s`ɹ": 293,
|
298 |
+
"s`ɹ→": 294,
|
299 |
+
"s`ɹ↑": 295,
|
300 |
+
"s`ɹ↓↑": 296,
|
301 |
+
"s`ɹ���": 297,
|
302 |
+
"ɹ`ɹ": 298,
|
303 |
+
"ɹ`ɹ→": 299,
|
304 |
+
"ɹ`ɹ↑": 300,
|
305 |
+
"ɹ`ɹ↓↑": 301,
|
306 |
+
"ɹ`ɹ↓": 302,
|
307 |
+
"ts⁼ɹ": 303,
|
308 |
+
"ts⁼ɹ→": 304,
|
309 |
+
"ts⁼ɹ↑": 305,
|
310 |
+
"ts⁼ɹ↓↑": 306,
|
311 |
+
"ts⁼ɹ↓": 307,
|
312 |
+
"tsʰɹ": 308,
|
313 |
+
"tsʰɹ→": 309,
|
314 |
+
"tsʰɹ↑": 310,
|
315 |
+
"tsʰɹ↓↑": 311,
|
316 |
+
"tsʰɹ↓": 312,
|
317 |
+
"sɹ": 313,
|
318 |
+
"sɹ→": 314,
|
319 |
+
"sɹ↑": 315,
|
320 |
+
"sɹ↓↑": 316,
|
321 |
+
"sɹ↓": 317,
|
322 |
+
|
323 |
+
"ɯ": 318,
|
324 |
+
"e": 319,
|
325 |
+
"aː": 320,
|
326 |
+
"ɯː": 321,
|
327 |
+
"eː": 322,
|
328 |
+
"ç": 323,
|
329 |
+
"ɸ": 324,
|
330 |
+
"ɰᵝ": 325,
|
331 |
+
"ɴ": 326,
|
332 |
+
"g": 327,
|
333 |
+
"dʑ": 328,
|
334 |
+
"q": 329,
|
335 |
+
"ː": 330,
|
336 |
+
"bj": 331,
|
337 |
+
"tɕ": 332,
|
338 |
+
"dej": 333,
|
339 |
+
"tej": 334,
|
340 |
+
"gj": 335,
|
341 |
+
"gɯ": 336,
|
342 |
+
"çj": 337,
|
343 |
+
"kj": 338,
|
344 |
+
"kɯ": 339,
|
345 |
+
"mj": 340,
|
346 |
+
"nj": 341,
|
347 |
+
"pj": 342,
|
348 |
+
"ɾj": 343,
|
349 |
+
"ɕ": 344,
|
350 |
+
"tsɯ": 345,
|
351 |
+
|
352 |
+
"ɐ": 346,
|
353 |
+
"ɑ": 347,
|
354 |
+
"ɒ": 348,
|
355 |
+
"ɜ": 349,
|
356 |
+
"ɫ": 350,
|
357 |
+
"ʑ": 351,
|
358 |
+
"ʲ": 352,
|
359 |
+
|
360 |
+
"y": 353,
|
361 |
+
"ø": 354,
|
362 |
+
"œ": 355,
|
363 |
+
"ʁ": 356,
|
364 |
+
"̃": 357,
|
365 |
+
"ɲ": 358,
|
366 |
+
|
367 |
+
":": 359,
|
368 |
+
";": 360,
|
369 |
+
"'": 361,
|
370 |
+
"…": 362
|
371 |
+
}
|
372 |
+
}
|
diffrhythm/g2p/g2p_generation.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2024 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
import os
|
7 |
+
import sys
|
8 |
+
|
9 |
+
from diffrhythm.g2p.g2p import PhonemeBpeTokenizer
|
10 |
+
from diffrhythm.g2p.utils.g2p import phonemizer_g2p
|
11 |
+
import tqdm
|
12 |
+
from typing import List
|
13 |
+
import json
|
14 |
+
import os
|
15 |
+
import re
|
16 |
+
|
17 |
+
|
18 |
+
def ph_g2p(text, language):
|
19 |
+
|
20 |
+
return phonemizer_g2p(text=text, language=language)
|
21 |
+
|
22 |
+
|
23 |
+
def g2p(text, sentence, language):
|
24 |
+
|
25 |
+
return text_tokenizer.tokenize(text=text, sentence=sentence, language=language)
|
26 |
+
|
27 |
+
|
28 |
+
def is_chinese(char):
|
29 |
+
if char >= "\u4e00" and char <= "\u9fa5":
|
30 |
+
return True
|
31 |
+
else:
|
32 |
+
return False
|
33 |
+
|
34 |
+
|
35 |
+
def is_alphabet(char):
|
36 |
+
if (char >= "\u0041" and char <= "\u005a") or (
|
37 |
+
char >= "\u0061" and char <= "\u007a"
|
38 |
+
):
|
39 |
+
return True
|
40 |
+
else:
|
41 |
+
return False
|
42 |
+
|
43 |
+
|
44 |
+
def is_other(char):
|
45 |
+
if not (is_chinese(char) or is_alphabet(char)):
|
46 |
+
return True
|
47 |
+
else:
|
48 |
+
return False
|
49 |
+
|
50 |
+
|
51 |
+
def get_segment(text: str) -> List[str]:
|
52 |
+
# sentence --> [ch_part, en_part, ch_part, ...]
|
53 |
+
segments = []
|
54 |
+
types = []
|
55 |
+
flag = 0
|
56 |
+
temp_seg = ""
|
57 |
+
temp_lang = ""
|
58 |
+
|
59 |
+
# Determine the type of each character. type: blank, chinese, alphabet, number, unk and point.
|
60 |
+
for i, ch in enumerate(text):
|
61 |
+
if is_chinese(ch):
|
62 |
+
types.append("zh")
|
63 |
+
elif is_alphabet(ch):
|
64 |
+
types.append("en")
|
65 |
+
else:
|
66 |
+
types.append("other")
|
67 |
+
|
68 |
+
assert len(types) == len(text)
|
69 |
+
|
70 |
+
for i in range(len(types)):
|
71 |
+
# find the first char of the seg
|
72 |
+
if flag == 0:
|
73 |
+
temp_seg += text[i]
|
74 |
+
temp_lang = types[i]
|
75 |
+
flag = 1
|
76 |
+
else:
|
77 |
+
if temp_lang == "other":
|
78 |
+
if types[i] == temp_lang:
|
79 |
+
temp_seg += text[i]
|
80 |
+
else:
|
81 |
+
temp_seg += text[i]
|
82 |
+
temp_lang = types[i]
|
83 |
+
else:
|
84 |
+
if types[i] == temp_lang:
|
85 |
+
temp_seg += text[i]
|
86 |
+
elif types[i] == "other":
|
87 |
+
temp_seg += text[i]
|
88 |
+
else:
|
89 |
+
segments.append((temp_seg, temp_lang))
|
90 |
+
temp_seg = text[i]
|
91 |
+
temp_lang = types[i]
|
92 |
+
flag = 1
|
93 |
+
|
94 |
+
segments.append((temp_seg, temp_lang))
|
95 |
+
return segments
|
96 |
+
|
97 |
+
|
98 |
+
def chn_eng_g2p(text: str):
|
99 |
+
# now only en and ch
|
100 |
+
segments = get_segment(text)
|
101 |
+
all_phoneme = ""
|
102 |
+
all_tokens = []
|
103 |
+
|
104 |
+
for index in range(len(segments)):
|
105 |
+
seg = segments[index]
|
106 |
+
phoneme, token = g2p(seg[0], text, seg[1])
|
107 |
+
all_phoneme += phoneme + "|"
|
108 |
+
all_tokens += token
|
109 |
+
|
110 |
+
if seg[1] == "en" and index == len(segments) - 1 and all_phoneme[-2] == "_":
|
111 |
+
all_phoneme = all_phoneme[:-2]
|
112 |
+
all_tokens = all_tokens[:-1]
|
113 |
+
return all_phoneme, all_tokens
|
114 |
+
|
115 |
+
|
116 |
+
text_tokenizer = PhonemeBpeTokenizer()
|
117 |
+
with open("./diffrhythm/g2p/g2p/vocab.json", "r") as f:
|
118 |
+
json_data = f.read()
|
119 |
+
data = json.loads(json_data)
|
120 |
+
vocab = data["vocab"]
|
121 |
+
|
122 |
+
if __name__ == '__main__':
|
123 |
+
phone, token = chn_eng_g2p("你好,hello world")
|
124 |
+
phone, token = chn_eng_g2p("你好,hello world, Bonjour, 테스트 해 보겠습니다, 五月雨緑")
|
125 |
+
print(phone)
|
126 |
+
print(token)
|
127 |
+
|
128 |
+
#phone, token = text_tokenizer.tokenize("你好,hello world, Bonjour, 테스트 해 보겠습니다, 五月雨緑", "", "auto")
|
129 |
+
phone, token = text_tokenizer.tokenize("緑", "", "auto")
|
130 |
+
#phone, token = text_tokenizer.tokenize("आइए इसका परीक्षण करें", "", "auto")
|
131 |
+
#phone, token = text_tokenizer.tokenize("आइए इसका परीक्षण करें", "", "other")
|
132 |
+
print(phone)
|
133 |
+
print(token)
|
diffrhythm/g2p/sources/bpmf_2_pinyin.txt
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
b ㄅ
|
2 |
+
p ㄆ
|
3 |
+
m ㄇ
|
4 |
+
f ㄈ
|
5 |
+
d ㄉ
|
6 |
+
t ㄊ
|
7 |
+
n ㄋ
|
8 |
+
l ㄌ
|
9 |
+
g ㄍ
|
10 |
+
k ㄎ
|
11 |
+
h ㄏ
|
12 |
+
j ㄐ
|
13 |
+
q ㄑ
|
14 |
+
x ㄒ
|
15 |
+
zh ㄓ
|
16 |
+
ch ㄔ
|
17 |
+
sh ㄕ
|
18 |
+
r ㄖ
|
19 |
+
z ㄗ
|
20 |
+
c ㄘ
|
21 |
+
s ㄙ
|
22 |
+
i ㄧ
|
23 |
+
u ㄨ
|
24 |
+
v ㄩ
|
25 |
+
a ㄚ
|
26 |
+
o ㄛ
|
27 |
+
e ㄜ
|
28 |
+
e ㄝ
|
29 |
+
ai ㄞ
|
30 |
+
ei ㄟ
|
31 |
+
ao ㄠ
|
32 |
+
ou ㄡ
|
33 |
+
an ㄢ
|
34 |
+
en ㄣ
|
35 |
+
ang ㄤ
|
36 |
+
eng ㄥ
|
37 |
+
er ㄦ
|
38 |
+
2 ˊ
|
39 |
+
3 ˇ
|
40 |
+
4 ˋ
|
41 |
+
0 ˙
|
diffrhythm/g2p/sources/chinese_lexicon.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a3a7685d1c3e68eb2fa304bfc63e90c90c3c1a1948839a5b1b507b2131b3e2fb
|
3 |
+
size 14779443
|
diffrhythm/g2p/sources/g2p_chinese_model/config.json
ADDED
@@ -0,0 +1,819 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/BERT-POLY-v2/pretrained_models/mini_bert",
|
3 |
+
"architectures": [
|
4 |
+
"BertPoly"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"directionality": "bidi",
|
9 |
+
"gradient_checkpointing": false,
|
10 |
+
"hidden_act": "gelu",
|
11 |
+
"hidden_dropout_prob": 0.1,
|
12 |
+
"hidden_size": 384,
|
13 |
+
"id2label": {
|
14 |
+
"0": "LABEL_0",
|
15 |
+
"1": "LABEL_1",
|
16 |
+
"2": "LABEL_2",
|
17 |
+
"3": "LABEL_3",
|
18 |
+
"4": "LABEL_4",
|
19 |
+
"5": "LABEL_5",
|
20 |
+
"6": "LABEL_6",
|
21 |
+
"7": "LABEL_7",
|
22 |
+
"8": "LABEL_8",
|
23 |
+
"9": "LABEL_9",
|
24 |
+
"10": "LABEL_10",
|
25 |
+
"11": "LABEL_11",
|
26 |
+
"12": "LABEL_12",
|
27 |
+
"13": "LABEL_13",
|
28 |
+
"14": "LABEL_14",
|
29 |
+
"15": "LABEL_15",
|
30 |
+
"16": "LABEL_16",
|
31 |
+
"17": "LABEL_17",
|
32 |
+
"18": "LABEL_18",
|
33 |
+
"19": "LABEL_19",
|
34 |
+
"20": "LABEL_20",
|
35 |
+
"21": "LABEL_21",
|
36 |
+
"22": "LABEL_22",
|
37 |
+
"23": "LABEL_23",
|
38 |
+
"24": "LABEL_24",
|
39 |
+
"25": "LABEL_25",
|
40 |
+
"26": "LABEL_26",
|
41 |
+
"27": "LABEL_27",
|
42 |
+
"28": "LABEL_28",
|
43 |
+
"29": "LABEL_29",
|
44 |
+
"30": "LABEL_30",
|
45 |
+
"31": "LABEL_31",
|
46 |
+
"32": "LABEL_32",
|
47 |
+
"33": "LABEL_33",
|
48 |
+
"34": "LABEL_34",
|
49 |
+
"35": "LABEL_35",
|
50 |
+
"36": "LABEL_36",
|
51 |
+
"37": "LABEL_37",
|
52 |
+
"38": "LABEL_38",
|
53 |
+
"39": "LABEL_39",
|
54 |
+
"40": "LABEL_40",
|
55 |
+
"41": "LABEL_41",
|
56 |
+
"42": "LABEL_42",
|
57 |
+
"43": "LABEL_43",
|
58 |
+
"44": "LABEL_44",
|
59 |
+
"45": "LABEL_45",
|
60 |
+
"46": "LABEL_46",
|
61 |
+
"47": "LABEL_47",
|
62 |
+
"48": "LABEL_48",
|
63 |
+
"49": "LABEL_49",
|
64 |
+
"50": "LABEL_50",
|
65 |
+
"51": "LABEL_51",
|
66 |
+
"52": "LABEL_52",
|
67 |
+
"53": "LABEL_53",
|
68 |
+
"54": "LABEL_54",
|
69 |
+
"55": "LABEL_55",
|
70 |
+
"56": "LABEL_56",
|
71 |
+
"57": "LABEL_57",
|
72 |
+
"58": "LABEL_58",
|
73 |
+
"59": "LABEL_59",
|
74 |
+
"60": "LABEL_60",
|
75 |
+
"61": "LABEL_61",
|
76 |
+
"62": "LABEL_62",
|
77 |
+
"63": "LABEL_63",
|
78 |
+
"64": "LABEL_64",
|
79 |
+
"65": "LABEL_65",
|
80 |
+
"66": "LABEL_66",
|
81 |
+
"67": "LABEL_67",
|
82 |
+
"68": "LABEL_68",
|
83 |
+
"69": "LABEL_69",
|
84 |
+
"70": "LABEL_70",
|
85 |
+
"71": "LABEL_71",
|
86 |
+
"72": "LABEL_72",
|
87 |
+
"73": "LABEL_73",
|
88 |
+
"74": "LABEL_74",
|
89 |
+
"75": "LABEL_75",
|
90 |
+
"76": "LABEL_76",
|
91 |
+
"77": "LABEL_77",
|
92 |
+
"78": "LABEL_78",
|
93 |
+
"79": "LABEL_79",
|
94 |
+
"80": "LABEL_80",
|
95 |
+
"81": "LABEL_81",
|
96 |
+
"82": "LABEL_82",
|
97 |
+
"83": "LABEL_83",
|
98 |
+
"84": "LABEL_84",
|
99 |
+
"85": "LABEL_85",
|
100 |
+
"86": "LABEL_86",
|
101 |
+
"87": "LABEL_87",
|
102 |
+
"88": "LABEL_88",
|
103 |
+
"89": "LABEL_89",
|
104 |
+
"90": "LABEL_90",
|
105 |
+
"91": "LABEL_91",
|
106 |
+
"92": "LABEL_92",
|
107 |
+
"93": "LABEL_93",
|
108 |
+
"94": "LABEL_94",
|
109 |
+
"95": "LABEL_95",
|
110 |
+
"96": "LABEL_96",
|
111 |
+
"97": "LABEL_97",
|
112 |
+
"98": "LABEL_98",
|
113 |
+
"99": "LABEL_99",
|
114 |
+
"100": "LABEL_100",
|
115 |
+
"101": "LABEL_101",
|
116 |
+
"102": "LABEL_102",
|
117 |
+
"103": "LABEL_103",
|
118 |
+
"104": "LABEL_104",
|
119 |
+
"105": "LABEL_105",
|
120 |
+
"106": "LABEL_106",
|
121 |
+
"107": "LABEL_107",
|
122 |
+
"108": "LABEL_108",
|
123 |
+
"109": "LABEL_109",
|
124 |
+
"110": "LABEL_110",
|
125 |
+
"111": "LABEL_111",
|
126 |
+
"112": "LABEL_112",
|
127 |
+
"113": "LABEL_113",
|
128 |
+
"114": "LABEL_114",
|
129 |
+
"115": "LABEL_115",
|
130 |
+
"116": "LABEL_116",
|
131 |
+
"117": "LABEL_117",
|
132 |
+
"118": "LABEL_118",
|
133 |
+
"119": "LABEL_119",
|
134 |
+
"120": "LABEL_120",
|
135 |
+
"121": "LABEL_121",
|
136 |
+
"122": "LABEL_122",
|
137 |
+
"123": "LABEL_123",
|
138 |
+
"124": "LABEL_124",
|
139 |
+
"125": "LABEL_125",
|
140 |
+
"126": "LABEL_126",
|
141 |
+
"127": "LABEL_127",
|
142 |
+
"128": "LABEL_128",
|
143 |
+
"129": "LABEL_129",
|
144 |
+
"130": "LABEL_130",
|
145 |
+
"131": "LABEL_131",
|
146 |
+
"132": "LABEL_132",
|
147 |
+
"133": "LABEL_133",
|
148 |
+
"134": "LABEL_134",
|
149 |
+
"135": "LABEL_135",
|
150 |
+
"136": "LABEL_136",
|
151 |
+
"137": "LABEL_137",
|
152 |
+
"138": "LABEL_138",
|
153 |
+
"139": "LABEL_139",
|
154 |
+
"140": "LABEL_140",
|
155 |
+
"141": "LABEL_141",
|
156 |
+
"142": "LABEL_142",
|
157 |
+
"143": "LABEL_143",
|
158 |
+
"144": "LABEL_144",
|
159 |
+
"145": "LABEL_145",
|
160 |
+
"146": "LABEL_146",
|
161 |
+
"147": "LABEL_147",
|
162 |
+
"148": "LABEL_148",
|
163 |
+
"149": "LABEL_149",
|
164 |
+
"150": "LABEL_150",
|
165 |
+
"151": "LABEL_151",
|
166 |
+
"152": "LABEL_152",
|
167 |
+
"153": "LABEL_153",
|
168 |
+
"154": "LABEL_154",
|
169 |
+
"155": "LABEL_155",
|
170 |
+
"156": "LABEL_156",
|
171 |
+
"157": "LABEL_157",
|
172 |
+
"158": "LABEL_158",
|
173 |
+
"159": "LABEL_159",
|
174 |
+
"160": "LABEL_160",
|
175 |
+
"161": "LABEL_161",
|
176 |
+
"162": "LABEL_162",
|
177 |
+
"163": "LABEL_163",
|
178 |
+
"164": "LABEL_164",
|
179 |
+
"165": "LABEL_165",
|
180 |
+
"166": "LABEL_166",
|
181 |
+
"167": "LABEL_167",
|
182 |
+
"168": "LABEL_168",
|
183 |
+
"169": "LABEL_169",
|
184 |
+
"170": "LABEL_170",
|
185 |
+
"171": "LABEL_171",
|
186 |
+
"172": "LABEL_172",
|
187 |
+
"173": "LABEL_173",
|
188 |
+
"174": "LABEL_174",
|
189 |
+
"175": "LABEL_175",
|
190 |
+
"176": "LABEL_176",
|
191 |
+
"177": "LABEL_177",
|
192 |
+
"178": "LABEL_178",
|
193 |
+
"179": "LABEL_179",
|
194 |
+
"180": "LABEL_180",
|
195 |
+
"181": "LABEL_181",
|
196 |
+
"182": "LABEL_182",
|
197 |
+
"183": "LABEL_183",
|
198 |
+
"184": "LABEL_184",
|
199 |
+
"185": "LABEL_185",
|
200 |
+
"186": "LABEL_186",
|
201 |
+
"187": "LABEL_187",
|
202 |
+
"188": "LABEL_188",
|
203 |
+
"189": "LABEL_189",
|
204 |
+
"190": "LABEL_190",
|
205 |
+
"191": "LABEL_191",
|
206 |
+
"192": "LABEL_192",
|
207 |
+
"193": "LABEL_193",
|
208 |
+
"194": "LABEL_194",
|
209 |
+
"195": "LABEL_195",
|
210 |
+
"196": "LABEL_196",
|
211 |
+
"197": "LABEL_197",
|
212 |
+
"198": "LABEL_198",
|
213 |
+
"199": "LABEL_199",
|
214 |
+
"200": "LABEL_200",
|
215 |
+
"201": "LABEL_201",
|
216 |
+
"202": "LABEL_202",
|
217 |
+
"203": "LABEL_203",
|
218 |
+
"204": "LABEL_204",
|
219 |
+
"205": "LABEL_205",
|
220 |
+
"206": "LABEL_206",
|
221 |
+
"207": "LABEL_207",
|
222 |
+
"208": "LABEL_208",
|
223 |
+
"209": "LABEL_209",
|
224 |
+
"210": "LABEL_210",
|
225 |
+
"211": "LABEL_211",
|
226 |
+
"212": "LABEL_212",
|
227 |
+
"213": "LABEL_213",
|
228 |
+
"214": "LABEL_214",
|
229 |
+
"215": "LABEL_215",
|
230 |
+
"216": "LABEL_216",
|
231 |
+
"217": "LABEL_217",
|
232 |
+
"218": "LABEL_218",
|
233 |
+
"219": "LABEL_219",
|
234 |
+
"220": "LABEL_220",
|
235 |
+
"221": "LABEL_221",
|
236 |
+
"222": "LABEL_222",
|
237 |
+
"223": "LABEL_223",
|
238 |
+
"224": "LABEL_224",
|
239 |
+
"225": "LABEL_225",
|
240 |
+
"226": "LABEL_226",
|
241 |
+
"227": "LABEL_227",
|
242 |
+
"228": "LABEL_228",
|
243 |
+
"229": "LABEL_229",
|
244 |
+
"230": "LABEL_230",
|
245 |
+
"231": "LABEL_231",
|
246 |
+
"232": "LABEL_232",
|
247 |
+
"233": "LABEL_233",
|
248 |
+
"234": "LABEL_234",
|
249 |
+
"235": "LABEL_235",
|
250 |
+
"236": "LABEL_236",
|
251 |
+
"237": "LABEL_237",
|
252 |
+
"238": "LABEL_238",
|
253 |
+
"239": "LABEL_239",
|
254 |
+
"240": "LABEL_240",
|
255 |
+
"241": "LABEL_241",
|
256 |
+
"242": "LABEL_242",
|
257 |
+
"243": "LABEL_243",
|
258 |
+
"244": "LABEL_244",
|
259 |
+
"245": "LABEL_245",
|
260 |
+
"246": "LABEL_246",
|
261 |
+
"247": "LABEL_247",
|
262 |
+
"248": "LABEL_248",
|
263 |
+
"249": "LABEL_249",
|
264 |
+
"250": "LABEL_250",
|
265 |
+
"251": "LABEL_251",
|
266 |
+
"252": "LABEL_252",
|
267 |
+
"253": "LABEL_253",
|
268 |
+
"254": "LABEL_254",
|
269 |
+
"255": "LABEL_255",
|
270 |
+
"256": "LABEL_256",
|
271 |
+
"257": "LABEL_257",
|
272 |
+
"258": "LABEL_258",
|
273 |
+
"259": "LABEL_259",
|
274 |
+
"260": "LABEL_260",
|
275 |
+
"261": "LABEL_261",
|
276 |
+
"262": "LABEL_262",
|
277 |
+
"263": "LABEL_263",
|
278 |
+
"264": "LABEL_264",
|
279 |
+
"265": "LABEL_265",
|
280 |
+
"266": "LABEL_266",
|
281 |
+
"267": "LABEL_267",
|
282 |
+
"268": "LABEL_268",
|
283 |
+
"269": "LABEL_269",
|
284 |
+
"270": "LABEL_270",
|
285 |
+
"271": "LABEL_271",
|
286 |
+
"272": "LABEL_272",
|
287 |
+
"273": "LABEL_273",
|
288 |
+
"274": "LABEL_274",
|
289 |
+
"275": "LABEL_275",
|
290 |
+
"276": "LABEL_276",
|
291 |
+
"277": "LABEL_277",
|
292 |
+
"278": "LABEL_278",
|
293 |
+
"279": "LABEL_279",
|
294 |
+
"280": "LABEL_280",
|
295 |
+
"281": "LABEL_281",
|
296 |
+
"282": "LABEL_282",
|
297 |
+
"283": "LABEL_283",
|
298 |
+
"284": "LABEL_284",
|
299 |
+
"285": "LABEL_285",
|
300 |
+
"286": "LABEL_286",
|
301 |
+
"287": "LABEL_287",
|
302 |
+
"288": "LABEL_288",
|
303 |
+
"289": "LABEL_289",
|
304 |
+
"290": "LABEL_290",
|
305 |
+
"291": "LABEL_291",
|
306 |
+
"292": "LABEL_292",
|
307 |
+
"293": "LABEL_293",
|
308 |
+
"294": "LABEL_294",
|
309 |
+
"295": "LABEL_295",
|
310 |
+
"296": "LABEL_296",
|
311 |
+
"297": "LABEL_297",
|
312 |
+
"298": "LABEL_298",
|
313 |
+
"299": "LABEL_299",
|
314 |
+
"300": "LABEL_300",
|
315 |
+
"301": "LABEL_301",
|
316 |
+
"302": "LABEL_302",
|
317 |
+
"303": "LABEL_303",
|
318 |
+
"304": "LABEL_304",
|
319 |
+
"305": "LABEL_305",
|
320 |
+
"306": "LABEL_306",
|
321 |
+
"307": "LABEL_307",
|
322 |
+
"308": "LABEL_308",
|
323 |
+
"309": "LABEL_309",
|
324 |
+
"310": "LABEL_310",
|
325 |
+
"311": "LABEL_311",
|
326 |
+
"312": "LABEL_312",
|
327 |
+
"313": "LABEL_313",
|
328 |
+
"314": "LABEL_314",
|
329 |
+
"315": "LABEL_315",
|
330 |
+
"316": "LABEL_316",
|
331 |
+
"317": "LABEL_317",
|
332 |
+
"318": "LABEL_318",
|
333 |
+
"319": "LABEL_319",
|
334 |
+
"320": "LABEL_320",
|
335 |
+
"321": "LABEL_321",
|
336 |
+
"322": "LABEL_322",
|
337 |
+
"323": "LABEL_323",
|
338 |
+
"324": "LABEL_324",
|
339 |
+
"325": "LABEL_325",
|
340 |
+
"326": "LABEL_326",
|
341 |
+
"327": "LABEL_327",
|
342 |
+
"328": "LABEL_328",
|
343 |
+
"329": "LABEL_329",
|
344 |
+
"330": "LABEL_330",
|
345 |
+
"331": "LABEL_331",
|
346 |
+
"332": "LABEL_332",
|
347 |
+
"333": "LABEL_333",
|
348 |
+
"334": "LABEL_334",
|
349 |
+
"335": "LABEL_335",
|
350 |
+
"336": "LABEL_336",
|
351 |
+
"337": "LABEL_337",
|
352 |
+
"338": "LABEL_338",
|
353 |
+
"339": "LABEL_339",
|
354 |
+
"340": "LABEL_340",
|
355 |
+
"341": "LABEL_341",
|
356 |
+
"342": "LABEL_342",
|
357 |
+
"343": "LABEL_343",
|
358 |
+
"344": "LABEL_344",
|
359 |
+
"345": "LABEL_345",
|
360 |
+
"346": "LABEL_346",
|
361 |
+
"347": "LABEL_347",
|
362 |
+
"348": "LABEL_348",
|
363 |
+
"349": "LABEL_349",
|
364 |
+
"350": "LABEL_350",
|
365 |
+
"351": "LABEL_351",
|
366 |
+
"352": "LABEL_352",
|
367 |
+
"353": "LABEL_353",
|
368 |
+
"354": "LABEL_354",
|
369 |
+
"355": "LABEL_355",
|
370 |
+
"356": "LABEL_356",
|
371 |
+
"357": "LABEL_357",
|
372 |
+
"358": "LABEL_358",
|
373 |
+
"359": "LABEL_359",
|
374 |
+
"360": "LABEL_360",
|
375 |
+
"361": "LABEL_361",
|
376 |
+
"362": "LABEL_362",
|
377 |
+
"363": "LABEL_363",
|
378 |
+
"364": "LABEL_364",
|
379 |
+
"365": "LABEL_365",
|
380 |
+
"366": "LABEL_366",
|
381 |
+
"367": "LABEL_367",
|
382 |
+
"368": "LABEL_368",
|
383 |
+
"369": "LABEL_369",
|
384 |
+
"370": "LABEL_370",
|
385 |
+
"371": "LABEL_371",
|
386 |
+
"372": "LABEL_372",
|
387 |
+
"373": "LABEL_373",
|
388 |
+
"374": "LABEL_374",
|
389 |
+
"375": "LABEL_375",
|
390 |
+
"376": "LABEL_376",
|
391 |
+
"377": "LABEL_377",
|
392 |
+
"378": "LABEL_378",
|
393 |
+
"379": "LABEL_379",
|
394 |
+
"380": "LABEL_380",
|
395 |
+
"381": "LABEL_381",
|
396 |
+
"382": "LABEL_382",
|
397 |
+
"383": "LABEL_383",
|
398 |
+
"384": "LABEL_384",
|
399 |
+
"385": "LABEL_385",
|
400 |
+
"386": "LABEL_386",
|
401 |
+
"387": "LABEL_387",
|
402 |
+
"388": "LABEL_388",
|
403 |
+
"389": "LABEL_389",
|
404 |
+
"390": "LABEL_390"
|
405 |
+
},
|
406 |
+
"initializer_range": 0.02,
|
407 |
+
"intermediate_size": 1536,
|
408 |
+
"label2id": {
|
409 |
+
"LABEL_0": 0,
|
410 |
+
"LABEL_1": 1,
|
411 |
+
"LABEL_10": 10,
|
412 |
+
"LABEL_100": 100,
|
413 |
+
"LABEL_101": 101,
|
414 |
+
"LABEL_102": 102,
|
415 |
+
"LABEL_103": 103,
|
416 |
+
"LABEL_104": 104,
|
417 |
+
"LABEL_105": 105,
|
418 |
+
"LABEL_106": 106,
|
419 |
+
"LABEL_107": 107,
|
420 |
+
"LABEL_108": 108,
|
421 |
+
"LABEL_109": 109,
|
422 |
+
"LABEL_11": 11,
|
423 |
+
"LABEL_110": 110,
|
424 |
+
"LABEL_111": 111,
|
425 |
+
"LABEL_112": 112,
|
426 |
+
"LABEL_113": 113,
|
427 |
+
"LABEL_114": 114,
|
428 |
+
"LABEL_115": 115,
|
429 |
+
"LABEL_116": 116,
|
430 |
+
"LABEL_117": 117,
|
431 |
+
"LABEL_118": 118,
|
432 |
+
"LABEL_119": 119,
|
433 |
+
"LABEL_12": 12,
|
434 |
+
"LABEL_120": 120,
|
435 |
+
"LABEL_121": 121,
|
436 |
+
"LABEL_122": 122,
|
437 |
+
"LABEL_123": 123,
|
438 |
+
"LABEL_124": 124,
|
439 |
+
"LABEL_125": 125,
|
440 |
+
"LABEL_126": 126,
|
441 |
+
"LABEL_127": 127,
|
442 |
+
"LABEL_128": 128,
|
443 |
+
"LABEL_129": 129,
|
444 |
+
"LABEL_13": 13,
|
445 |
+
"LABEL_130": 130,
|
446 |
+
"LABEL_131": 131,
|
447 |
+
"LABEL_132": 132,
|
448 |
+
"LABEL_133": 133,
|
449 |
+
"LABEL_134": 134,
|
450 |
+
"LABEL_135": 135,
|
451 |
+
"LABEL_136": 136,
|
452 |
+
"LABEL_137": 137,
|
453 |
+
"LABEL_138": 138,
|
454 |
+
"LABEL_139": 139,
|
455 |
+
"LABEL_14": 14,
|
456 |
+
"LABEL_140": 140,
|
457 |
+
"LABEL_141": 141,
|
458 |
+
"LABEL_142": 142,
|
459 |
+
"LABEL_143": 143,
|
460 |
+
"LABEL_144": 144,
|
461 |
+
"LABEL_145": 145,
|
462 |
+
"LABEL_146": 146,
|
463 |
+
"LABEL_147": 147,
|
464 |
+
"LABEL_148": 148,
|
465 |
+
"LABEL_149": 149,
|
466 |
+
"LABEL_15": 15,
|
467 |
+
"LABEL_150": 150,
|
468 |
+
"LABEL_151": 151,
|
469 |
+
"LABEL_152": 152,
|
470 |
+
"LABEL_153": 153,
|
471 |
+
"LABEL_154": 154,
|
472 |
+
"LABEL_155": 155,
|
473 |
+
"LABEL_156": 156,
|
474 |
+
"LABEL_157": 157,
|
475 |
+
"LABEL_158": 158,
|
476 |
+
"LABEL_159": 159,
|
477 |
+
"LABEL_16": 16,
|
478 |
+
"LABEL_160": 160,
|
479 |
+
"LABEL_161": 161,
|
480 |
+
"LABEL_162": 162,
|
481 |
+
"LABEL_163": 163,
|
482 |
+
"LABEL_164": 164,
|
483 |
+
"LABEL_165": 165,
|
484 |
+
"LABEL_166": 166,
|
485 |
+
"LABEL_167": 167,
|
486 |
+
"LABEL_168": 168,
|
487 |
+
"LABEL_169": 169,
|
488 |
+
"LABEL_17": 17,
|
489 |
+
"LABEL_170": 170,
|
490 |
+
"LABEL_171": 171,
|
491 |
+
"LABEL_172": 172,
|
492 |
+
"LABEL_173": 173,
|
493 |
+
"LABEL_174": 174,
|
494 |
+
"LABEL_175": 175,
|
495 |
+
"LABEL_176": 176,
|
496 |
+
"LABEL_177": 177,
|
497 |
+
"LABEL_178": 178,
|
498 |
+
"LABEL_179": 179,
|
499 |
+
"LABEL_18": 18,
|
500 |
+
"LABEL_180": 180,
|
501 |
+
"LABEL_181": 181,
|
502 |
+
"LABEL_182": 182,
|
503 |
+
"LABEL_183": 183,
|
504 |
+
"LABEL_184": 184,
|
505 |
+
"LABEL_185": 185,
|
506 |
+
"LABEL_186": 186,
|
507 |
+
"LABEL_187": 187,
|
508 |
+
"LABEL_188": 188,
|
509 |
+
"LABEL_189": 189,
|
510 |
+
"LABEL_19": 19,
|
511 |
+
"LABEL_190": 190,
|
512 |
+
"LABEL_191": 191,
|
513 |
+
"LABEL_192": 192,
|
514 |
+
"LABEL_193": 193,
|
515 |
+
"LABEL_194": 194,
|
516 |
+
"LABEL_195": 195,
|
517 |
+
"LABEL_196": 196,
|
518 |
+
"LABEL_197": 197,
|
519 |
+
"LABEL_198": 198,
|
520 |
+
"LABEL_199": 199,
|
521 |
+
"LABEL_2": 2,
|
522 |
+
"LABEL_20": 20,
|
523 |
+
"LABEL_200": 200,
|
524 |
+
"LABEL_201": 201,
|
525 |
+
"LABEL_202": 202,
|
526 |
+
"LABEL_203": 203,
|
527 |
+
"LABEL_204": 204,
|
528 |
+
"LABEL_205": 205,
|
529 |
+
"LABEL_206": 206,
|
530 |
+
"LABEL_207": 207,
|
531 |
+
"LABEL_208": 208,
|
532 |
+
"LABEL_209": 209,
|
533 |
+
"LABEL_21": 21,
|
534 |
+
"LABEL_210": 210,
|
535 |
+
"LABEL_211": 211,
|
536 |
+
"LABEL_212": 212,
|
537 |
+
"LABEL_213": 213,
|
538 |
+
"LABEL_214": 214,
|
539 |
+
"LABEL_215": 215,
|
540 |
+
"LABEL_216": 216,
|
541 |
+
"LABEL_217": 217,
|
542 |
+
"LABEL_218": 218,
|
543 |
+
"LABEL_219": 219,
|
544 |
+
"LABEL_22": 22,
|
545 |
+
"LABEL_220": 220,
|
546 |
+
"LABEL_221": 221,
|
547 |
+
"LABEL_222": 222,
|
548 |
+
"LABEL_223": 223,
|
549 |
+
"LABEL_224": 224,
|
550 |
+
"LABEL_225": 225,
|
551 |
+
"LABEL_226": 226,
|
552 |
+
"LABEL_227": 227,
|
553 |
+
"LABEL_228": 228,
|
554 |
+
"LABEL_229": 229,
|
555 |
+
"LABEL_23": 23,
|
556 |
+
"LABEL_230": 230,
|
557 |
+
"LABEL_231": 231,
|
558 |
+
"LABEL_232": 232,
|
559 |
+
"LABEL_233": 233,
|
560 |
+
"LABEL_234": 234,
|
561 |
+
"LABEL_235": 235,
|
562 |
+
"LABEL_236": 236,
|
563 |
+
"LABEL_237": 237,
|
564 |
+
"LABEL_238": 238,
|
565 |
+
"LABEL_239": 239,
|
566 |
+
"LABEL_24": 24,
|
567 |
+
"LABEL_240": 240,
|
568 |
+
"LABEL_241": 241,
|
569 |
+
"LABEL_242": 242,
|
570 |
+
"LABEL_243": 243,
|
571 |
+
"LABEL_244": 244,
|
572 |
+
"LABEL_245": 245,
|
573 |
+
"LABEL_246": 246,
|
574 |
+
"LABEL_247": 247,
|
575 |
+
"LABEL_248": 248,
|
576 |
+
"LABEL_249": 249,
|
577 |
+
"LABEL_25": 25,
|
578 |
+
"LABEL_250": 250,
|
579 |
+
"LABEL_251": 251,
|
580 |
+
"LABEL_252": 252,
|
581 |
+
"LABEL_253": 253,
|
582 |
+
"LABEL_254": 254,
|
583 |
+
"LABEL_255": 255,
|
584 |
+
"LABEL_256": 256,
|
585 |
+
"LABEL_257": 257,
|
586 |
+
"LABEL_258": 258,
|
587 |
+
"LABEL_259": 259,
|
588 |
+
"LABEL_26": 26,
|
589 |
+
"LABEL_260": 260,
|
590 |
+
"LABEL_261": 261,
|
591 |
+
"LABEL_262": 262,
|
592 |
+
"LABEL_263": 263,
|
593 |
+
"LABEL_264": 264,
|
594 |
+
"LABEL_265": 265,
|
595 |
+
"LABEL_266": 266,
|
596 |
+
"LABEL_267": 267,
|
597 |
+
"LABEL_268": 268,
|
598 |
+
"LABEL_269": 269,
|
599 |
+
"LABEL_27": 27,
|
600 |
+
"LABEL_270": 270,
|
601 |
+
"LABEL_271": 271,
|
602 |
+
"LABEL_272": 272,
|
603 |
+
"LABEL_273": 273,
|
604 |
+
"LABEL_274": 274,
|
605 |
+
"LABEL_275": 275,
|
606 |
+
"LABEL_276": 276,
|
607 |
+
"LABEL_277": 277,
|
608 |
+
"LABEL_278": 278,
|
609 |
+
"LABEL_279": 279,
|
610 |
+
"LABEL_28": 28,
|
611 |
+
"LABEL_280": 280,
|
612 |
+
"LABEL_281": 281,
|
613 |
+
"LABEL_282": 282,
|
614 |
+
"LABEL_283": 283,
|
615 |
+
"LABEL_284": 284,
|
616 |
+
"LABEL_285": 285,
|
617 |
+
"LABEL_286": 286,
|
618 |
+
"LABEL_287": 287,
|
619 |
+
"LABEL_288": 288,
|
620 |
+
"LABEL_289": 289,
|
621 |
+
"LABEL_29": 29,
|
622 |
+
"LABEL_290": 290,
|
623 |
+
"LABEL_291": 291,
|
624 |
+
"LABEL_292": 292,
|
625 |
+
"LABEL_293": 293,
|
626 |
+
"LABEL_294": 294,
|
627 |
+
"LABEL_295": 295,
|
628 |
+
"LABEL_296": 296,
|
629 |
+
"LABEL_297": 297,
|
630 |
+
"LABEL_298": 298,
|
631 |
+
"LABEL_299": 299,
|
632 |
+
"LABEL_3": 3,
|
633 |
+
"LABEL_30": 30,
|
634 |
+
"LABEL_300": 300,
|
635 |
+
"LABEL_301": 301,
|
636 |
+
"LABEL_302": 302,
|
637 |
+
"LABEL_303": 303,
|
638 |
+
"LABEL_304": 304,
|
639 |
+
"LABEL_305": 305,
|
640 |
+
"LABEL_306": 306,
|
641 |
+
"LABEL_307": 307,
|
642 |
+
"LABEL_308": 308,
|
643 |
+
"LABEL_309": 309,
|
644 |
+
"LABEL_31": 31,
|
645 |
+
"LABEL_310": 310,
|
646 |
+
"LABEL_311": 311,
|
647 |
+
"LABEL_312": 312,
|
648 |
+
"LABEL_313": 313,
|
649 |
+
"LABEL_314": 314,
|
650 |
+
"LABEL_315": 315,
|
651 |
+
"LABEL_316": 316,
|
652 |
+
"LABEL_317": 317,
|
653 |
+
"LABEL_318": 318,
|
654 |
+
"LABEL_319": 319,
|
655 |
+
"LABEL_32": 32,
|
656 |
+
"LABEL_320": 320,
|
657 |
+
"LABEL_321": 321,
|
658 |
+
"LABEL_322": 322,
|
659 |
+
"LABEL_323": 323,
|
660 |
+
"LABEL_324": 324,
|
661 |
+
"LABEL_325": 325,
|
662 |
+
"LABEL_326": 326,
|
663 |
+
"LABEL_327": 327,
|
664 |
+
"LABEL_328": 328,
|
665 |
+
"LABEL_329": 329,
|
666 |
+
"LABEL_33": 33,
|
667 |
+
"LABEL_330": 330,
|
668 |
+
"LABEL_331": 331,
|
669 |
+
"LABEL_332": 332,
|
670 |
+
"LABEL_333": 333,
|
671 |
+
"LABEL_334": 334,
|
672 |
+
"LABEL_335": 335,
|
673 |
+
"LABEL_336": 336,
|
674 |
+
"LABEL_337": 337,
|
675 |
+
"LABEL_338": 338,
|
676 |
+
"LABEL_339": 339,
|
677 |
+
"LABEL_34": 34,
|
678 |
+
"LABEL_340": 340,
|
679 |
+
"LABEL_341": 341,
|
680 |
+
"LABEL_342": 342,
|
681 |
+
"LABEL_343": 343,
|
682 |
+
"LABEL_344": 344,
|
683 |
+
"LABEL_345": 345,
|
684 |
+
"LABEL_346": 346,
|
685 |
+
"LABEL_347": 347,
|
686 |
+
"LABEL_348": 348,
|
687 |
+
"LABEL_349": 349,
|
688 |
+
"LABEL_35": 35,
|
689 |
+
"LABEL_350": 350,
|
690 |
+
"LABEL_351": 351,
|
691 |
+
"LABEL_352": 352,
|
692 |
+
"LABEL_353": 353,
|
693 |
+
"LABEL_354": 354,
|
694 |
+
"LABEL_355": 355,
|
695 |
+
"LABEL_356": 356,
|
696 |
+
"LABEL_357": 357,
|
697 |
+
"LABEL_358": 358,
|
698 |
+
"LABEL_359": 359,
|
699 |
+
"LABEL_36": 36,
|
700 |
+
"LABEL_360": 360,
|
701 |
+
"LABEL_361": 361,
|
702 |
+
"LABEL_362": 362,
|
703 |
+
"LABEL_363": 363,
|
704 |
+
"LABEL_364": 364,
|
705 |
+
"LABEL_365": 365,
|
706 |
+
"LABEL_366": 366,
|
707 |
+
"LABEL_367": 367,
|
708 |
+
"LABEL_368": 368,
|
709 |
+
"LABEL_369": 369,
|
710 |
+
"LABEL_37": 37,
|
711 |
+
"LABEL_370": 370,
|
712 |
+
"LABEL_371": 371,
|
713 |
+
"LABEL_372": 372,
|
714 |
+
"LABEL_373": 373,
|
715 |
+
"LABEL_374": 374,
|
716 |
+
"LABEL_375": 375,
|
717 |
+
"LABEL_376": 376,
|
718 |
+
"LABEL_377": 377,
|
719 |
+
"LABEL_378": 378,
|
720 |
+
"LABEL_379": 379,
|
721 |
+
"LABEL_38": 38,
|
722 |
+
"LABEL_380": 380,
|
723 |
+
"LABEL_381": 381,
|
724 |
+
"LABEL_382": 382,
|
725 |
+
"LABEL_383": 383,
|
726 |
+
"LABEL_384": 384,
|
727 |
+
"LABEL_385": 385,
|
728 |
+
"LABEL_386": 386,
|
729 |
+
"LABEL_387": 387,
|
730 |
+
"LABEL_388": 388,
|
731 |
+
"LABEL_389": 389,
|
732 |
+
"LABEL_39": 39,
|
733 |
+
"LABEL_390": 390,
|
734 |
+
"LABEL_4": 4,
|
735 |
+
"LABEL_40": 40,
|
736 |
+
"LABEL_41": 41,
|
737 |
+
"LABEL_42": 42,
|
738 |
+
"LABEL_43": 43,
|
739 |
+
"LABEL_44": 44,
|
740 |
+
"LABEL_45": 45,
|
741 |
+
"LABEL_46": 46,
|
742 |
+
"LABEL_47": 47,
|
743 |
+
"LABEL_48": 48,
|
744 |
+
"LABEL_49": 49,
|
745 |
+
"LABEL_5": 5,
|
746 |
+
"LABEL_50": 50,
|
747 |
+
"LABEL_51": 51,
|
748 |
+
"LABEL_52": 52,
|
749 |
+
"LABEL_53": 53,
|
750 |
+
"LABEL_54": 54,
|
751 |
+
"LABEL_55": 55,
|
752 |
+
"LABEL_56": 56,
|
753 |
+
"LABEL_57": 57,
|
754 |
+
"LABEL_58": 58,
|
755 |
+
"LABEL_59": 59,
|
756 |
+
"LABEL_6": 6,
|
757 |
+
"LABEL_60": 60,
|
758 |
+
"LABEL_61": 61,
|
759 |
+
"LABEL_62": 62,
|
760 |
+
"LABEL_63": 63,
|
761 |
+
"LABEL_64": 64,
|
762 |
+
"LABEL_65": 65,
|
763 |
+
"LABEL_66": 66,
|
764 |
+
"LABEL_67": 67,
|
765 |
+
"LABEL_68": 68,
|
766 |
+
"LABEL_69": 69,
|
767 |
+
"LABEL_7": 7,
|
768 |
+
"LABEL_70": 70,
|
769 |
+
"LABEL_71": 71,
|
770 |
+
"LABEL_72": 72,
|
771 |
+
"LABEL_73": 73,
|
772 |
+
"LABEL_74": 74,
|
773 |
+
"LABEL_75": 75,
|
774 |
+
"LABEL_76": 76,
|
775 |
+
"LABEL_77": 77,
|
776 |
+
"LABEL_78": 78,
|
777 |
+
"LABEL_79": 79,
|
778 |
+
"LABEL_8": 8,
|
779 |
+
"LABEL_80": 80,
|
780 |
+
"LABEL_81": 81,
|
781 |
+
"LABEL_82": 82,
|
782 |
+
"LABEL_83": 83,
|
783 |
+
"LABEL_84": 84,
|
784 |
+
"LABEL_85": 85,
|
785 |
+
"LABEL_86": 86,
|
786 |
+
"LABEL_87": 87,
|
787 |
+
"LABEL_88": 88,
|
788 |
+
"LABEL_89": 89,
|
789 |
+
"LABEL_9": 9,
|
790 |
+
"LABEL_90": 90,
|
791 |
+
"LABEL_91": 91,
|
792 |
+
"LABEL_92": 92,
|
793 |
+
"LABEL_93": 93,
|
794 |
+
"LABEL_94": 94,
|
795 |
+
"LABEL_95": 95,
|
796 |
+
"LABEL_96": 96,
|
797 |
+
"LABEL_97": 97,
|
798 |
+
"LABEL_98": 98,
|
799 |
+
"LABEL_99": 99
|
800 |
+
},
|
801 |
+
"layer_norm_eps": 1e-12,
|
802 |
+
"max_position_embeddings": 512,
|
803 |
+
"model_type": "bert",
|
804 |
+
"num_attention_heads": 12,
|
805 |
+
"num_hidden_layers": 6,
|
806 |
+
"num_relation_heads": 32,
|
807 |
+
"pad_token_id": 0,
|
808 |
+
"pooler_fc_size": 768,
|
809 |
+
"pooler_num_attention_heads": 12,
|
810 |
+
"pooler_num_fc_layers": 3,
|
811 |
+
"pooler_size_per_head": 128,
|
812 |
+
"pooler_type": "first_token_transform",
|
813 |
+
"position_embedding_type": "absolute",
|
814 |
+
"torch_dtype": "float32",
|
815 |
+
"transformers_version": "4.44.1",
|
816 |
+
"type_vocab_size": 2,
|
817 |
+
"use_cache": true,
|
818 |
+
"vocab_size": 21128
|
819 |
+
}
|
diffrhythm/g2p/sources/g2p_chinese_model/poly_bert_model.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8765d835ffdf9811c832d4dc7b6a552757aa8615c01d1184db716a50c20aebbc
|
3 |
+
size 76583333
|
diffrhythm/g2p/sources/g2p_chinese_model/polychar.txt
ADDED
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
丧
|
2 |
+
中
|
3 |
+
为
|
4 |
+
乌
|
5 |
+
乐
|
6 |
+
了
|
7 |
+
什
|
8 |
+
仔
|
9 |
+
令
|
10 |
+
任
|
11 |
+
会
|
12 |
+
传
|
13 |
+
佛
|
14 |
+
供
|
15 |
+
便
|
16 |
+
倒
|
17 |
+
假
|
18 |
+
兴
|
19 |
+
冠
|
20 |
+
冲
|
21 |
+
几
|
22 |
+
分
|
23 |
+
切
|
24 |
+
划
|
25 |
+
创
|
26 |
+
剥
|
27 |
+
勒
|
28 |
+
区
|
29 |
+
华
|
30 |
+
单
|
31 |
+
卜
|
32 |
+
占
|
33 |
+
卡
|
34 |
+
卷
|
35 |
+
厦
|
36 |
+
参
|
37 |
+
发
|
38 |
+
只
|
39 |
+
号
|
40 |
+
同
|
41 |
+
吐
|
42 |
+
和
|
43 |
+
喝
|
44 |
+
圈
|
45 |
+
地
|
46 |
+
塞
|
47 |
+
壳
|
48 |
+
处
|
49 |
+
奇
|
50 |
+
奔
|
51 |
+
好
|
52 |
+
宁
|
53 |
+
宿
|
54 |
+
将
|
55 |
+
少
|
56 |
+
尽
|
57 |
+
岗
|
58 |
+
差
|
59 |
+
巷
|
60 |
+
帖
|
61 |
+
干
|
62 |
+
应
|
63 |
+
度
|
64 |
+
弹
|
65 |
+
强
|
66 |
+
当
|
67 |
+
待
|
68 |
+
得
|
69 |
+
恶
|
70 |
+
扁
|
71 |
+
扇
|
72 |
+
扎
|
73 |
+
扫
|
74 |
+
担
|
75 |
+
挑
|
76 |
+
据
|
77 |
+
撒
|
78 |
+
教
|
79 |
+
散
|
80 |
+
数
|
81 |
+
斗
|
82 |
+
晃
|
83 |
+
曝
|
84 |
+
曲
|
85 |
+
更
|
86 |
+
曾
|
87 |
+
朝
|
88 |
+
朴
|
89 |
+
杆
|
90 |
+
查
|
91 |
+
校
|
92 |
+
模
|
93 |
+
横
|
94 |
+
没
|
95 |
+
泡
|
96 |
+
济
|
97 |
+
混
|
98 |
+
漂
|
99 |
+
炸
|
100 |
+
熟
|
101 |
+
燕
|
102 |
+
片
|
103 |
+
率
|
104 |
+
畜
|
105 |
+
的
|
106 |
+
盛
|
107 |
+
相
|
108 |
+
省
|
109 |
+
看
|
110 |
+
着
|
111 |
+
矫
|
112 |
+
禁
|
113 |
+
种
|
114 |
+
称
|
115 |
+
空
|
116 |
+
答
|
117 |
+
粘
|
118 |
+
糊
|
119 |
+
系
|
120 |
+
累
|
121 |
+
纤
|
122 |
+
结
|
123 |
+
给
|
124 |
+
缝
|
125 |
+
肖
|
126 |
+
背
|
127 |
+
脏
|
128 |
+
舍
|
129 |
+
色
|
130 |
+
落
|
131 |
+
蒙
|
132 |
+
薄
|
133 |
+
藏
|
134 |
+
血
|
135 |
+
行
|
136 |
+
要
|
137 |
+
观
|
138 |
+
觉
|
139 |
+
角
|
140 |
+
解
|
141 |
+
说
|
142 |
+
调
|
143 |
+
踏
|
144 |
+
车
|
145 |
+
转
|
146 |
+
载
|
147 |
+
还
|
148 |
+
遂
|
149 |
+
都
|
150 |
+
重
|
151 |
+
量
|
152 |
+
钻
|
153 |
+
铺
|
154 |
+
长
|
155 |
+
间
|
156 |
+
降
|
157 |
+
难
|
158 |
+
露
|
159 |
+
鲜
|
diffrhythm/g2p/sources/g2p_chinese_model/polydict.json
ADDED
@@ -0,0 +1,393 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"1": "丧{sang1}",
|
3 |
+
"2": "丧{sang4}",
|
4 |
+
"3": "中{zhong1}",
|
5 |
+
"4": "中{zhong4}",
|
6 |
+
"5": "为{wei2}",
|
7 |
+
"6": "为{wei4}",
|
8 |
+
"7": "乌{wu1}",
|
9 |
+
"8": "乌{wu4}",
|
10 |
+
"9": "乐{lao4}",
|
11 |
+
"10": "乐{le4}",
|
12 |
+
"11": "乐{le5}",
|
13 |
+
"12": "乐{yao4}",
|
14 |
+
"13": "乐{yve4}",
|
15 |
+
"14": "了{le5}",
|
16 |
+
"15": "了{liao3}",
|
17 |
+
"16": "了{liao5}",
|
18 |
+
"17": "什{shen2}",
|
19 |
+
"18": "什{shi2}",
|
20 |
+
"19": "仔{zai3}",
|
21 |
+
"20": "仔{zai5}",
|
22 |
+
"21": "仔{zi3}",
|
23 |
+
"22": "仔{zi5}",
|
24 |
+
"23": "令{ling2}",
|
25 |
+
"24": "令{ling4}",
|
26 |
+
"25": "任{ren2}",
|
27 |
+
"26": "任{ren4}",
|
28 |
+
"27": "会{hui4}",
|
29 |
+
"28": "会{hui5}",
|
30 |
+
"29": "会{kuai4}",
|
31 |
+
"30": "传{chuan2}",
|
32 |
+
"31": "传{zhuan4}",
|
33 |
+
"32": "佛{fo2}",
|
34 |
+
"33": "佛{fu2}",
|
35 |
+
"34": "供{gong1}",
|
36 |
+
"35": "供{gong4}",
|
37 |
+
"36": "便{bian4}",
|
38 |
+
"37": "便{pian2}",
|
39 |
+
"38": "倒{dao3}",
|
40 |
+
"39": "倒{dao4}",
|
41 |
+
"40": "假{jia3}",
|
42 |
+
"41": "假{jia4}",
|
43 |
+
"42": "兴{xing1}",
|
44 |
+
"43": "兴{xing4}",
|
45 |
+
"44": "冠{guan1}",
|
46 |
+
"45": "冠{guan4}",
|
47 |
+
"46": "冲{chong1}",
|
48 |
+
"47": "冲{chong4}",
|
49 |
+
"48": "几{ji1}",
|
50 |
+
"49": "几{ji2}",
|
51 |
+
"50": "几{ji3}",
|
52 |
+
"51": "分{fen1}",
|
53 |
+
"52": "分{fen4}",
|
54 |
+
"53": "分{fen5}",
|
55 |
+
"54": "切{qie1}",
|
56 |
+
"55": "切{qie4}",
|
57 |
+
"56": "划{hua2}",
|
58 |
+
"57": "划{hua4}",
|
59 |
+
"58": "划{hua5}",
|
60 |
+
"59": "创{chuang1}",
|
61 |
+
"60": "创{chuang4}",
|
62 |
+
"61": "剥{bao1}",
|
63 |
+
"62": "剥{bo1}",
|
64 |
+
"63": "勒{le4}",
|
65 |
+
"64": "勒{le5}",
|
66 |
+
"65": "勒{lei1}",
|
67 |
+
"66": "区{ou1}",
|
68 |
+
"67": "区{qu1}",
|
69 |
+
"68": "华{hua2}",
|
70 |
+
"69": "华{hua4}",
|
71 |
+
"70": "单{chan2}",
|
72 |
+
"71": "单{dan1}",
|
73 |
+
"72": "单{shan4}",
|
74 |
+
"73": "卜{bo5}",
|
75 |
+
"74": "卜{bu3}",
|
76 |
+
"75": "占{zhan1}",
|
77 |
+
"76": "占{zhan4}",
|
78 |
+
"77": "卡{ka2}",
|
79 |
+
"78": "卡{ka3}",
|
80 |
+
"79": "卡{qia3}",
|
81 |
+
"80": "卷{jvan3}",
|
82 |
+
"81": "卷{jvan4}",
|
83 |
+
"82": "厦{sha4}",
|
84 |
+
"83": "厦{xia4}",
|
85 |
+
"84": "参{can1}",
|
86 |
+
"85": "参{cen1}",
|
87 |
+
"86": "参{shen1}",
|
88 |
+
"87": "发{fa1}",
|
89 |
+
"88": "发{fa4}",
|
90 |
+
"89": "发{fa5}",
|
91 |
+
"90": "只{zhi1}",
|
92 |
+
"91": "只{zhi3}",
|
93 |
+
"92": "号{hao2}",
|
94 |
+
"93": "号{hao4}",
|
95 |
+
"94": "号{hao5}",
|
96 |
+
"95": "同{tong2}",
|
97 |
+
"96": "同{tong4}",
|
98 |
+
"97": "同{tong5}",
|
99 |
+
"98": "吐{tu2}",
|
100 |
+
"99": "吐{tu3}",
|
101 |
+
"100": "吐{tu4}",
|
102 |
+
"101": "和{he2}",
|
103 |
+
"102": "和{he4}",
|
104 |
+
"103": "和{he5}",
|
105 |
+
"104": "和{huo2}",
|
106 |
+
"105": "和{huo4}",
|
107 |
+
"106": "和{huo5}",
|
108 |
+
"107": "喝{he1}",
|
109 |
+
"108": "喝{he4}",
|
110 |
+
"109": "圈{jvan4}",
|
111 |
+
"110": "圈{qvan1}",
|
112 |
+
"111": "圈{qvan5}",
|
113 |
+
"112": "地{de5}",
|
114 |
+
"113": "地{di4}",
|
115 |
+
"114": "地{di5}",
|
116 |
+
"115": "塞{sai1}",
|
117 |
+
"116": "塞{sai2}",
|
118 |
+
"117": "塞{sai4}",
|
119 |
+
"118": "塞{se4}",
|
120 |
+
"119": "壳{ke2}",
|
121 |
+
"120": "壳{qiao4}",
|
122 |
+
"121": "处{chu3}",
|
123 |
+
"122": "处{chu4}",
|
124 |
+
"123": "奇{ji1}",
|
125 |
+
"124": "奇{qi2}",
|
126 |
+
"125": "奔{ben1}",
|
127 |
+
"126": "奔{ben4}",
|
128 |
+
"127": "好{hao3}",
|
129 |
+
"128": "好{hao4}",
|
130 |
+
"129": "好{hao5}",
|
131 |
+
"130": "宁{ning2}",
|
132 |
+
"131": "宁{ning4}",
|
133 |
+
"132": "宁{ning5}",
|
134 |
+
"133": "宿{su4}",
|
135 |
+
"134": "宿{xiu3}",
|
136 |
+
"135": "宿{xiu4}",
|
137 |
+
"136": "将{jiang1}",
|
138 |
+
"137": "将{jiang4}",
|
139 |
+
"138": "少{shao3}",
|
140 |
+
"139": "少{shao4}",
|
141 |
+
"140": "尽{jin3}",
|
142 |
+
"141": "尽{jin4}",
|
143 |
+
"142": "岗{gang1}",
|
144 |
+
"143": "岗{gang3}",
|
145 |
+
"144": "差{cha1}",
|
146 |
+
"145": "差{cha4}",
|
147 |
+
"146": "差{chai1}",
|
148 |
+
"147": "差{ci1}",
|
149 |
+
"148": "巷{hang4}",
|
150 |
+
"149": "巷{xiang4}",
|
151 |
+
"150": "帖{tie1}",
|
152 |
+
"151": "帖{tie3}",
|
153 |
+
"152": "帖{tie4}",
|
154 |
+
"153": "干{gan1}",
|
155 |
+
"154": "干{gan4}",
|
156 |
+
"155": "应{ying1}",
|
157 |
+
"156": "应{ying4}",
|
158 |
+
"157": "应{ying5}",
|
159 |
+
"158": "度{du4}",
|
160 |
+
"159": "度{du5}",
|
161 |
+
"160": "度{duo2}",
|
162 |
+
"161": "弹{dan4}",
|
163 |
+
"162": "弹{tan2}",
|
164 |
+
"163": "弹{tan5}",
|
165 |
+
"164": "强{jiang4}",
|
166 |
+
"165": "强{qiang2}",
|
167 |
+
"166": "强{qiang3}",
|
168 |
+
"167": "当{dang1}",
|
169 |
+
"168": "当{dang4}",
|
170 |
+
"169": "当{dang5}",
|
171 |
+
"170": "待{dai1}",
|
172 |
+
"171": "待{dai4}",
|
173 |
+
"172": "得{de2}",
|
174 |
+
"173": "得{de5}",
|
175 |
+
"174": "得{dei3}",
|
176 |
+
"175": "得{dei5}",
|
177 |
+
"176": "恶{e3}",
|
178 |
+
"177": "恶{e4}",
|
179 |
+
"178": "恶{wu4}",
|
180 |
+
"179": "扁{bian3}",
|
181 |
+
"180": "扁{pian1}",
|
182 |
+
"181": "扇{shan1}",
|
183 |
+
"182": "扇{shan4}",
|
184 |
+
"183": "扎{za1}",
|
185 |
+
"184": "扎{zha1}",
|
186 |
+
"185": "扎{zha2}",
|
187 |
+
"186": "扫{sao3}",
|
188 |
+
"187": "扫{sao4}",
|
189 |
+
"188": "担{dan1}",
|
190 |
+
"189": "担{dan4}",
|
191 |
+
"190": "担{dan5}",
|
192 |
+
"191": "挑{tiao1}",
|
193 |
+
"192": "挑{tiao3}",
|
194 |
+
"193": "据{jv1}",
|
195 |
+
"194": "据{jv4}",
|
196 |
+
"195": "撒{sa1}",
|
197 |
+
"196": "撒{sa3}",
|
198 |
+
"197": "撒{sa5}",
|
199 |
+
"198": "教{jiao1}",
|
200 |
+
"199": "教{jiao4}",
|
201 |
+
"200": "散{san3}",
|
202 |
+
"201": "散{san4}",
|
203 |
+
"202": "散{san5}",
|
204 |
+
"203": "数{shu3}",
|
205 |
+
"204": "数{shu4}",
|
206 |
+
"205": "数{shu5}",
|
207 |
+
"206": "斗{dou3}",
|
208 |
+
"207": "斗{dou4}",
|
209 |
+
"208": "晃{huang3}",
|
210 |
+
"209": "曝{bao4}",
|
211 |
+
"210": "曲{qu1}",
|
212 |
+
"211": "曲{qu3}",
|
213 |
+
"212": "更{geng1}",
|
214 |
+
"213": "更{geng4}",
|
215 |
+
"214": "曾{ceng1}",
|
216 |
+
"215": "曾{ceng2}",
|
217 |
+
"216": "曾{zeng1}",
|
218 |
+
"217": "朝{chao2}",
|
219 |
+
"218": "朝{zhao1}",
|
220 |
+
"219": "朴{piao2}",
|
221 |
+
"220": "朴{pu2}",
|
222 |
+
"221": "朴{pu3}",
|
223 |
+
"222": "杆{gan1}",
|
224 |
+
"223": "杆{gan3}",
|
225 |
+
"224": "查{cha2}",
|
226 |
+
"225": "查{zha1}",
|
227 |
+
"226": "校{jiao4}",
|
228 |
+
"227": "校{xiao4}",
|
229 |
+
"228": "模{mo2}",
|
230 |
+
"229": "模{mu2}",
|
231 |
+
"230": "横{heng2}",
|
232 |
+
"231": "横{heng4}",
|
233 |
+
"232": "没{mei2}",
|
234 |
+
"233": "没{mo4}",
|
235 |
+
"234": "泡{pao1}",
|
236 |
+
"235": "泡{pao4}",
|
237 |
+
"236": "泡{pao5}",
|
238 |
+
"237": "济{ji3}",
|
239 |
+
"238": "济{ji4}",
|
240 |
+
"239": "混{hun2}",
|
241 |
+
"240": "混{hun3}",
|
242 |
+
"241": "混{hun4}",
|
243 |
+
"242": "混{hun5}",
|
244 |
+
"243": "漂{piao1}",
|
245 |
+
"244": "漂{piao3}",
|
246 |
+
"245": "漂{piao4}",
|
247 |
+
"246": "炸{zha2}",
|
248 |
+
"247": "炸{zha4}",
|
249 |
+
"248": "熟{shou2}",
|
250 |
+
"249": "熟{shu2}",
|
251 |
+
"250": "燕{yan1}",
|
252 |
+
"251": "燕{yan4}",
|
253 |
+
"252": "片{pian1}",
|
254 |
+
"253": "片{pian4}",
|
255 |
+
"254": "率{lv4}",
|
256 |
+
"255": "率{shuai4}",
|
257 |
+
"256": "畜{chu4}",
|
258 |
+
"257": "畜{xu4}",
|
259 |
+
"258": "的{de5}",
|
260 |
+
"259": "的{di1}",
|
261 |
+
"260": "的{di2}",
|
262 |
+
"261": "的{di4}",
|
263 |
+
"262": "的{di5}",
|
264 |
+
"263": "盛{cheng2}",
|
265 |
+
"264": "盛{sheng4}",
|
266 |
+
"265": "相{xiang1}",
|
267 |
+
"266": "相{xiang4}",
|
268 |
+
"267": "相{xiang5}",
|
269 |
+
"268": "省{sheng3}",
|
270 |
+
"269": "省{xing3}",
|
271 |
+
"270": "看{kan1}",
|
272 |
+
"271": "看{kan4}",
|
273 |
+
"272": "看{kan5}",
|
274 |
+
"273": "着{zhao1}",
|
275 |
+
"274": "着{zhao2}",
|
276 |
+
"275": "着{zhao5}",
|
277 |
+
"276": "着{zhe5}",
|
278 |
+
"277": "着{zhuo2}",
|
279 |
+
"278": "着{zhuo5}",
|
280 |
+
"279": "矫{jiao3}",
|
281 |
+
"280": "禁{jin1}",
|
282 |
+
"281": "禁{jin4}",
|
283 |
+
"282": "种{zhong3}",
|
284 |
+
"283": "种{zhong4}",
|
285 |
+
"284": "称{chen4}",
|
286 |
+
"285": "称{cheng1}",
|
287 |
+
"286": "空{kong1}",
|
288 |
+
"287": "空{kong4}",
|
289 |
+
"288": "答{da1}",
|
290 |
+
"289": "答{da2}",
|
291 |
+
"290": "粘{nian2}",
|
292 |
+
"291": "粘{zhan1}",
|
293 |
+
"292": "糊{hu2}",
|
294 |
+
"293": "糊{hu5}",
|
295 |
+
"294": "系{ji4}",
|
296 |
+
"295": "系{xi4}",
|
297 |
+
"296": "系{xi5}",
|
298 |
+
"297": "累{lei2}",
|
299 |
+
"298": "累{lei3}",
|
300 |
+
"299": "累{lei4}",
|
301 |
+
"300": "累{lei5}",
|
302 |
+
"301": "纤{qian4}",
|
303 |
+
"302": "纤{xian1}",
|
304 |
+
"303": "结{jie1}",
|
305 |
+
"304": "结{jie2}",
|
306 |
+
"305": "结{jie5}",
|
307 |
+
"306": "给{gei3}",
|
308 |
+
"307": "给{gei5}",
|
309 |
+
"308": "给{ji3}",
|
310 |
+
"309": "缝{feng2}",
|
311 |
+
"310": "缝{feng4}",
|
312 |
+
"311": "缝{feng5}",
|
313 |
+
"312": "肖{xiao1}",
|
314 |
+
"313": "肖{xiao4}",
|
315 |
+
"314": "背{bei1}",
|
316 |
+
"315": "背{bei4}",
|
317 |
+
"316": "脏{zang1}",
|
318 |
+
"317": "脏{zang4}",
|
319 |
+
"318": "舍{she3}",
|
320 |
+
"319": "舍{she4}",
|
321 |
+
"320": "色{se4}",
|
322 |
+
"321": "色{shai3}",
|
323 |
+
"322": "落{lao4}",
|
324 |
+
"323": "落{luo4}",
|
325 |
+
"324": "蒙{meng1}",
|
326 |
+
"325": "蒙{meng2}",
|
327 |
+
"326": "蒙{meng3}",
|
328 |
+
"327": "薄{bao2}",
|
329 |
+
"328": "薄{bo2}",
|
330 |
+
"329": "薄{bo4}",
|
331 |
+
"330": "藏{cang2}",
|
332 |
+
"331": "藏{zang4}",
|
333 |
+
"332": "血{xie3}",
|
334 |
+
"333": "血{xue4}",
|
335 |
+
"334": "行{hang2}",
|
336 |
+
"335": "行{hang5}",
|
337 |
+
"336": "行{heng5}",
|
338 |
+
"337": "行{xing2}",
|
339 |
+
"338": "行{xing4}",
|
340 |
+
"339": "要{yao1}",
|
341 |
+
"340": "要{yao4}",
|
342 |
+
"341": "观{guan1}",
|
343 |
+
"342": "观{guan4}",
|
344 |
+
"343": "觉{jiao4}",
|
345 |
+
"344": "觉{jiao5}",
|
346 |
+
"345": "觉{jve2}",
|
347 |
+
"346": "角{jiao3}",
|
348 |
+
"347": "角{jve2}",
|
349 |
+
"348": "解{jie3}",
|
350 |
+
"349": "解{jie4}",
|
351 |
+
"350": "解{xie4}",
|
352 |
+
"351": "说{shui4}",
|
353 |
+
"352": "说{shuo1}",
|
354 |
+
"353": "调{diao4}",
|
355 |
+
"354": "调{tiao2}",
|
356 |
+
"355": "踏{ta1}",
|
357 |
+
"356": "踏{ta4}",
|
358 |
+
"357": "车{che1}",
|
359 |
+
"358": "车{jv1}",
|
360 |
+
"359": "转{zhuan3}",
|
361 |
+
"360": "转{zhuan4}",
|
362 |
+
"361": "载{zai3}",
|
363 |
+
"362": "载{zai4}",
|
364 |
+
"363": "还{hai2}",
|
365 |
+
"364": "还{huan2}",
|
366 |
+
"365": "遂{sui2}",
|
367 |
+
"366": "遂{sui4}",
|
368 |
+
"367": "都{dou1}",
|
369 |
+
"368": "都{du1}",
|
370 |
+
"369": "重{chong2}",
|
371 |
+
"370": "重{zhong4}",
|
372 |
+
"371": "量{liang2}",
|
373 |
+
"372": "量{liang4}",
|
374 |
+
"373": "量{liang5}",
|
375 |
+
"374": "钻{zuan1}",
|
376 |
+
"375": "钻{zuan4}",
|
377 |
+
"376": "铺{pu1}",
|
378 |
+
"377": "铺{pu4}",
|
379 |
+
"378": "长{chang2}",
|
380 |
+
"379": "长{chang3}",
|
381 |
+
"380": "长{zhang3}",
|
382 |
+
"381": "间{jian1}",
|
383 |
+
"382": "间{jian4}",
|
384 |
+
"383": "降{jiang4}",
|
385 |
+
"384": "降{xiang2}",
|
386 |
+
"385": "难{nan2}",
|
387 |
+
"386": "难{nan4}",
|
388 |
+
"387": "难{nan5}",
|
389 |
+
"388": "露{lou4}",
|
390 |
+
"389": "露{lu4}",
|
391 |
+
"390": "鲜{xian1}",
|
392 |
+
"391": "鲜{xian3}"
|
393 |
+
}
|
diffrhythm/g2p/sources/g2p_chinese_model/polydict_r.json
ADDED
@@ -0,0 +1,393 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"丧{sang1}": 1,
|
3 |
+
"丧{sang4}": 2,
|
4 |
+
"中{zhong1}": 3,
|
5 |
+
"中{zhong4}": 4,
|
6 |
+
"为{wei2}": 5,
|
7 |
+
"为{wei4}": 6,
|
8 |
+
"乌{wu1}": 7,
|
9 |
+
"乌{wu4}": 8,
|
10 |
+
"乐{lao4}": 9,
|
11 |
+
"乐{le4}": 10,
|
12 |
+
"乐{le5}": 11,
|
13 |
+
"乐{yao4}": 12,
|
14 |
+
"乐{yve4}": 13,
|
15 |
+
"了{le5}": 14,
|
16 |
+
"了{liao3}": 15,
|
17 |
+
"了{liao5}": 16,
|
18 |
+
"什{shen2}": 17,
|
19 |
+
"什{shi2}": 18,
|
20 |
+
"仔{zai3}": 19,
|
21 |
+
"仔{zai5}": 20,
|
22 |
+
"仔{zi3}": 21,
|
23 |
+
"仔{zi5}": 22,
|
24 |
+
"令{ling2}": 23,
|
25 |
+
"令{ling4}": 24,
|
26 |
+
"任{ren2}": 25,
|
27 |
+
"任{ren4}": 26,
|
28 |
+
"会{hui4}": 27,
|
29 |
+
"会{hui5}": 28,
|
30 |
+
"会{kuai4}": 29,
|
31 |
+
"传{chuan2}": 30,
|
32 |
+
"传{zhuan4}": 31,
|
33 |
+
"佛{fo2}": 32,
|
34 |
+
"佛{fu2}": 33,
|
35 |
+
"供{gong1}": 34,
|
36 |
+
"供{gong4}": 35,
|
37 |
+
"便{bian4}": 36,
|
38 |
+
"便{pian2}": 37,
|
39 |
+
"倒{dao3}": 38,
|
40 |
+
"倒{dao4}": 39,
|
41 |
+
"假{jia3}": 40,
|
42 |
+
"假{jia4}": 41,
|
43 |
+
"兴{xing1}": 42,
|
44 |
+
"兴{xing4}": 43,
|
45 |
+
"冠{guan1}": 44,
|
46 |
+
"冠{guan4}": 45,
|
47 |
+
"冲{chong1}": 46,
|
48 |
+
"冲{chong4}": 47,
|
49 |
+
"几{ji1}": 48,
|
50 |
+
"几{ji2}": 49,
|
51 |
+
"几{ji3}": 50,
|
52 |
+
"分{fen1}": 51,
|
53 |
+
"分{fen4}": 52,
|
54 |
+
"分{fen5}": 53,
|
55 |
+
"切{qie1}": 54,
|
56 |
+
"切{qie4}": 55,
|
57 |
+
"划{hua2}": 56,
|
58 |
+
"划{hua4}": 57,
|
59 |
+
"划{hua5}": 58,
|
60 |
+
"创{chuang1}": 59,
|
61 |
+
"创{chuang4}": 60,
|
62 |
+
"剥{bao1}": 61,
|
63 |
+
"剥{bo1}": 62,
|
64 |
+
"勒{le4}": 63,
|
65 |
+
"勒{le5}": 64,
|
66 |
+
"勒{lei1}": 65,
|
67 |
+
"区{ou1}": 66,
|
68 |
+
"区{qu1}": 67,
|
69 |
+
"华{hua2}": 68,
|
70 |
+
"华{hua4}": 69,
|
71 |
+
"单{chan2}": 70,
|
72 |
+
"单{dan1}": 71,
|
73 |
+
"单{shan4}": 72,
|
74 |
+
"卜{bo5}": 73,
|
75 |
+
"卜{bu3}": 74,
|
76 |
+
"占{zhan1}": 75,
|
77 |
+
"占{zhan4}": 76,
|
78 |
+
"卡{ka2}": 77,
|
79 |
+
"卡{ka3}": 78,
|
80 |
+
"卡{qia3}": 79,
|
81 |
+
"卷{jvan3}": 80,
|
82 |
+
"卷{jvan4}": 81,
|
83 |
+
"厦{sha4}": 82,
|
84 |
+
"厦{xia4}": 83,
|
85 |
+
"参{can1}": 84,
|
86 |
+
"参{cen1}": 85,
|
87 |
+
"参{shen1}": 86,
|
88 |
+
"发{fa1}": 87,
|
89 |
+
"发{fa4}": 88,
|
90 |
+
"发{fa5}": 89,
|
91 |
+
"只{zhi1}": 90,
|
92 |
+
"只{zhi3}": 91,
|
93 |
+
"号{hao2}": 92,
|
94 |
+
"号{hao4}": 93,
|
95 |
+
"号{hao5}": 94,
|
96 |
+
"同{tong2}": 95,
|
97 |
+
"同{tong4}": 96,
|
98 |
+
"同{tong5}": 97,
|
99 |
+
"吐{tu2}": 98,
|
100 |
+
"吐{tu3}": 99,
|
101 |
+
"吐{tu4}": 100,
|
102 |
+
"和{he2}": 101,
|
103 |
+
"和{he4}": 102,
|
104 |
+
"和{he5}": 103,
|
105 |
+
"和{huo2}": 104,
|
106 |
+
"和{huo4}": 105,
|
107 |
+
"和{huo5}": 106,
|
108 |
+
"喝{he1}": 107,
|
109 |
+
"喝{he4}": 108,
|
110 |
+
"圈{jvan4}": 109,
|
111 |
+
"圈{qvan1}": 110,
|
112 |
+
"圈{qvan5}": 111,
|
113 |
+
"地{de5}": 112,
|
114 |
+
"地{di4}": 113,
|
115 |
+
"地{di5}": 114,
|
116 |
+
"塞{sai1}": 115,
|
117 |
+
"塞{sai2}": 116,
|
118 |
+
"塞{sai4}": 117,
|
119 |
+
"塞{se4}": 118,
|
120 |
+
"壳{ke2}": 119,
|
121 |
+
"壳{qiao4}": 120,
|
122 |
+
"处{chu3}": 121,
|
123 |
+
"处{chu4}": 122,
|
124 |
+
"奇{ji1}": 123,
|
125 |
+
"奇{qi2}": 124,
|
126 |
+
"奔{ben1}": 125,
|
127 |
+
"奔{ben4}": 126,
|
128 |
+
"好{hao3}": 127,
|
129 |
+
"好{hao4}": 128,
|
130 |
+
"好{hao5}": 129,
|
131 |
+
"宁{ning2}": 130,
|
132 |
+
"宁{ning4}": 131,
|
133 |
+
"宁{ning5}": 132,
|
134 |
+
"宿{su4}": 133,
|
135 |
+
"宿{xiu3}": 134,
|
136 |
+
"宿{xiu4}": 135,
|
137 |
+
"将{jiang1}": 136,
|
138 |
+
"将{jiang4}": 137,
|
139 |
+
"少{shao3}": 138,
|
140 |
+
"少{shao4}": 139,
|
141 |
+
"尽{jin3}": 140,
|
142 |
+
"尽{jin4}": 141,
|
143 |
+
"岗{gang1}": 142,
|
144 |
+
"岗{gang3}": 143,
|
145 |
+
"差{cha1}": 144,
|
146 |
+
"差{cha4}": 145,
|
147 |
+
"差{chai1}": 146,
|
148 |
+
"差{ci1}": 147,
|
149 |
+
"巷{hang4}": 148,
|
150 |
+
"巷{xiang4}": 149,
|
151 |
+
"帖{tie1}": 150,
|
152 |
+
"帖{tie3}": 151,
|
153 |
+
"帖{tie4}": 152,
|
154 |
+
"干{gan1}": 153,
|
155 |
+
"干{gan4}": 154,
|
156 |
+
"应{ying1}": 155,
|
157 |
+
"应{ying4}": 156,
|
158 |
+
"应{ying5}": 157,
|
159 |
+
"度{du4}": 158,
|
160 |
+
"度{du5}": 159,
|
161 |
+
"度{duo2}": 160,
|
162 |
+
"弹{dan4}": 161,
|
163 |
+
"弹{tan2}": 162,
|
164 |
+
"弹{tan5}": 163,
|
165 |
+
"强{jiang4}": 164,
|
166 |
+
"强{qiang2}": 165,
|
167 |
+
"强{qiang3}": 166,
|
168 |
+
"当{dang1}": 167,
|
169 |
+
"当{dang4}": 168,
|
170 |
+
"当{dang5}": 169,
|
171 |
+
"待{dai1}": 170,
|
172 |
+
"待{dai4}": 171,
|
173 |
+
"得{de2}": 172,
|
174 |
+
"得{de5}": 173,
|
175 |
+
"得{dei3}": 174,
|
176 |
+
"得{dei5}": 175,
|
177 |
+
"恶{e3}": 176,
|
178 |
+
"恶{e4}": 177,
|
179 |
+
"恶{wu4}": 178,
|
180 |
+
"扁{bian3}": 179,
|
181 |
+
"扁{pian1}": 180,
|
182 |
+
"扇{shan1}": 181,
|
183 |
+
"扇{shan4}": 182,
|
184 |
+
"扎{za1}": 183,
|
185 |
+
"扎{zha1}": 184,
|
186 |
+
"扎{zha2}": 185,
|
187 |
+
"扫{sao3}": 186,
|
188 |
+
"扫{sao4}": 187,
|
189 |
+
"担{dan1}": 188,
|
190 |
+
"担{dan4}": 189,
|
191 |
+
"担{dan5}": 190,
|
192 |
+
"挑{tiao1}": 191,
|
193 |
+
"挑{tiao3}": 192,
|
194 |
+
"据{jv1}": 193,
|
195 |
+
"据{jv4}": 194,
|
196 |
+
"撒{sa1}": 195,
|
197 |
+
"撒{sa3}": 196,
|
198 |
+
"撒{sa5}": 197,
|
199 |
+
"教{jiao1}": 198,
|
200 |
+
"教{jiao4}": 199,
|
201 |
+
"散{san3}": 200,
|
202 |
+
"散{san4}": 201,
|
203 |
+
"散{san5}": 202,
|
204 |
+
"数{shu3}": 203,
|
205 |
+
"数{shu4}": 204,
|
206 |
+
"数{shu5}": 205,
|
207 |
+
"斗{dou3}": 206,
|
208 |
+
"斗{dou4}": 207,
|
209 |
+
"晃{huang3}": 208,
|
210 |
+
"曝{bao4}": 209,
|
211 |
+
"曲{qu1}": 210,
|
212 |
+
"曲{qu3}": 211,
|
213 |
+
"更{geng1}": 212,
|
214 |
+
"更{geng4}": 213,
|
215 |
+
"曾{ceng1}": 214,
|
216 |
+
"曾{ceng2}": 215,
|
217 |
+
"曾{zeng1}": 216,
|
218 |
+
"朝{chao2}": 217,
|
219 |
+
"朝{zhao1}": 218,
|
220 |
+
"朴{piao2}": 219,
|
221 |
+
"朴{pu2}": 220,
|
222 |
+
"朴{pu3}": 221,
|
223 |
+
"杆{gan1}": 222,
|
224 |
+
"杆{gan3}": 223,
|
225 |
+
"查{cha2}": 224,
|
226 |
+
"查{zha1}": 225,
|
227 |
+
"校{jiao4}": 226,
|
228 |
+
"校{xiao4}": 227,
|
229 |
+
"模{mo2}": 228,
|
230 |
+
"模{mu2}": 229,
|
231 |
+
"横{heng2}": 230,
|
232 |
+
"横{heng4}": 231,
|
233 |
+
"没{mei2}": 232,
|
234 |
+
"没{mo4}": 233,
|
235 |
+
"泡{pao1}": 234,
|
236 |
+
"泡{pao4}": 235,
|
237 |
+
"泡{pao5}": 236,
|
238 |
+
"济{ji3}": 237,
|
239 |
+
"济{ji4}": 238,
|
240 |
+
"混{hun2}": 239,
|
241 |
+
"混{hun3}": 240,
|
242 |
+
"混{hun4}": 241,
|
243 |
+
"混{hun5}": 242,
|
244 |
+
"漂{piao1}": 243,
|
245 |
+
"漂{piao3}": 244,
|
246 |
+
"漂{piao4}": 245,
|
247 |
+
"炸{zha2}": 246,
|
248 |
+
"炸{zha4}": 247,
|
249 |
+
"熟{shou2}": 248,
|
250 |
+
"熟{shu2}": 249,
|
251 |
+
"燕{yan1}": 250,
|
252 |
+
"燕{yan4}": 251,
|
253 |
+
"片{pian1}": 252,
|
254 |
+
"片{pian4}": 253,
|
255 |
+
"率{lv4}": 254,
|
256 |
+
"率{shuai4}": 255,
|
257 |
+
"畜{chu4}": 256,
|
258 |
+
"畜{xu4}": 257,
|
259 |
+
"的{de5}": 258,
|
260 |
+
"的{di1}": 259,
|
261 |
+
"的{di2}": 260,
|
262 |
+
"的{di4}": 261,
|
263 |
+
"的{di5}": 262,
|
264 |
+
"盛{cheng2}": 263,
|
265 |
+
"盛{sheng4}": 264,
|
266 |
+
"相{xiang1}": 265,
|
267 |
+
"相{xiang4}": 266,
|
268 |
+
"相{xiang5}": 267,
|
269 |
+
"省{sheng3}": 268,
|
270 |
+
"省{xing3}": 269,
|
271 |
+
"看{kan1}": 270,
|
272 |
+
"看{kan4}": 271,
|
273 |
+
"看{kan5}": 272,
|
274 |
+
"着{zhao1}": 273,
|
275 |
+
"着{zhao2}": 274,
|
276 |
+
"着{zhao5}": 275,
|
277 |
+
"着{zhe5}": 276,
|
278 |
+
"着{zhuo2}": 277,
|
279 |
+
"着{zhuo5}": 278,
|
280 |
+
"矫{jiao3}": 279,
|
281 |
+
"禁{jin1}": 280,
|
282 |
+
"禁{jin4}": 281,
|
283 |
+
"种{zhong3}": 282,
|
284 |
+
"种{zhong4}": 283,
|
285 |
+
"称{chen4}": 284,
|
286 |
+
"称{cheng1}": 285,
|
287 |
+
"空{kong1}": 286,
|
288 |
+
"空{kong4}": 287,
|
289 |
+
"答{da1}": 288,
|
290 |
+
"答{da2}": 289,
|
291 |
+
"粘{nian2}": 290,
|
292 |
+
"粘{zhan1}": 291,
|
293 |
+
"糊{hu2}": 292,
|
294 |
+
"糊{hu5}": 293,
|
295 |
+
"系{ji4}": 294,
|
296 |
+
"系{xi4}": 295,
|
297 |
+
"系{xi5}": 296,
|
298 |
+
"累{lei2}": 297,
|
299 |
+
"累{lei3}": 298,
|
300 |
+
"累{lei4}": 299,
|
301 |
+
"累{lei5}": 300,
|
302 |
+
"纤{qian4}": 301,
|
303 |
+
"纤{xian1}": 302,
|
304 |
+
"结{jie1}": 303,
|
305 |
+
"结{jie2}": 304,
|
306 |
+
"结{jie5}": 305,
|
307 |
+
"给{gei3}": 306,
|
308 |
+
"给{gei5}": 307,
|
309 |
+
"给{ji3}": 308,
|
310 |
+
"缝{feng2}": 309,
|
311 |
+
"缝{feng4}": 310,
|
312 |
+
"缝{feng5}": 311,
|
313 |
+
"肖{xiao1}": 312,
|
314 |
+
"肖{xiao4}": 313,
|
315 |
+
"背{bei1}": 314,
|
316 |
+
"背{bei4}": 315,
|
317 |
+
"脏{zang1}": 316,
|
318 |
+
"脏{zang4}": 317,
|
319 |
+
"舍{she3}": 318,
|
320 |
+
"舍{she4}": 319,
|
321 |
+
"色{se4}": 320,
|
322 |
+
"色{shai3}": 321,
|
323 |
+
"落{lao4}": 322,
|
324 |
+
"落{luo4}": 323,
|
325 |
+
"蒙{meng1}": 324,
|
326 |
+
"蒙{meng2}": 325,
|
327 |
+
"蒙{meng3}": 326,
|
328 |
+
"薄{bao2}": 327,
|
329 |
+
"薄{bo2}": 328,
|
330 |
+
"薄{bo4}": 329,
|
331 |
+
"藏{cang2}": 330,
|
332 |
+
"藏{zang4}": 331,
|
333 |
+
"血{xie3}": 332,
|
334 |
+
"血{xue4}": 333,
|
335 |
+
"行{hang2}": 334,
|
336 |
+
"行{hang5}": 335,
|
337 |
+
"行{heng5}": 336,
|
338 |
+
"行{xing2}": 337,
|
339 |
+
"行{xing4}": 338,
|
340 |
+
"要{yao1}": 339,
|
341 |
+
"要{yao4}": 340,
|
342 |
+
"观{guan1}": 341,
|
343 |
+
"观{guan4}": 342,
|
344 |
+
"觉{jiao4}": 343,
|
345 |
+
"觉{jiao5}": 344,
|
346 |
+
"觉{jve2}": 345,
|
347 |
+
"角{jiao3}": 346,
|
348 |
+
"角{jve2}": 347,
|
349 |
+
"解{jie3}": 348,
|
350 |
+
"解{jie4}": 349,
|
351 |
+
"解{xie4}": 350,
|
352 |
+
"说{shui4}": 351,
|
353 |
+
"说{shuo1}": 352,
|
354 |
+
"调{diao4}": 353,
|
355 |
+
"调{tiao2}": 354,
|
356 |
+
"踏{ta1}": 355,
|
357 |
+
"踏{ta4}": 356,
|
358 |
+
"车{che1}": 357,
|
359 |
+
"车{jv1}": 358,
|
360 |
+
"转{zhuan3}": 359,
|
361 |
+
"转{zhuan4}": 360,
|
362 |
+
"载{zai3}": 361,
|
363 |
+
"载{zai4}": 362,
|
364 |
+
"还{hai2}": 363,
|
365 |
+
"还{huan2}": 364,
|
366 |
+
"遂{sui2}": 365,
|
367 |
+
"遂{sui4}": 366,
|
368 |
+
"都{dou1}": 367,
|
369 |
+
"都{du1}": 368,
|
370 |
+
"重{chong2}": 369,
|
371 |
+
"重{zhong4}": 370,
|
372 |
+
"量{liang2}": 371,
|
373 |
+
"量{liang4}": 372,
|
374 |
+
"量{liang5}": 373,
|
375 |
+
"钻{zuan1}": 374,
|
376 |
+
"钻{zuan4}": 375,
|
377 |
+
"铺{pu1}": 376,
|
378 |
+
"铺{pu4}": 377,
|
379 |
+
"长{chang2}": 378,
|
380 |
+
"长{chang3}": 379,
|
381 |
+
"长{zhang3}": 380,
|
382 |
+
"间{jian1}": 381,
|
383 |
+
"间{jian4}": 382,
|
384 |
+
"降{jiang4}": 383,
|
385 |
+
"降{xiang2}": 384,
|
386 |
+
"难{nan2}": 385,
|
387 |
+
"难{nan4}": 386,
|
388 |
+
"难{nan5}": 387,
|
389 |
+
"露{lou4}": 388,
|
390 |
+
"露{lu4}": 389,
|
391 |
+
"鲜{xian1}": 390,
|
392 |
+
"鲜{xian3}": 391
|
393 |
+
}
|
diffrhythm/g2p/sources/g2p_chinese_model/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
diffrhythm/g2p/sources/pinyin_2_bpmf.txt
ADDED
@@ -0,0 +1,429 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
a ㄚ
|
2 |
+
ai ㄞ
|
3 |
+
an ㄢ
|
4 |
+
ang ㄤ
|
5 |
+
ao ㄠ
|
6 |
+
ba ㄅㄚ
|
7 |
+
bai ㄅㄞ
|
8 |
+
ban ㄅㄢ
|
9 |
+
bang ㄅㄤ
|
10 |
+
bao ㄅㄠ
|
11 |
+
bei ㄅㄟ
|
12 |
+
ben ㄅㄣ
|
13 |
+
beng ㄅㄥ
|
14 |
+
bi ㄅㄧ
|
15 |
+
bian ㄅㄧㄢ
|
16 |
+
biang ㄅㄧㄤ
|
17 |
+
biao ㄅㄧㄠ
|
18 |
+
bie ㄅㄧㄝ
|
19 |
+
bin ㄅㄧㄣ
|
20 |
+
bing ㄅㄧㄥ
|
21 |
+
bo ㄅㄛ
|
22 |
+
bu ㄅㄨ
|
23 |
+
ca ㄘㄚ
|
24 |
+
cai ㄘㄞ
|
25 |
+
can ㄘㄢ
|
26 |
+
cang ㄘㄤ
|
27 |
+
cao ㄘㄠ
|
28 |
+
ce ㄘㄜ
|
29 |
+
cen ㄘㄣ
|
30 |
+
ceng ㄘㄥ
|
31 |
+
cha ㄔㄚ
|
32 |
+
chai ㄔㄞ
|
33 |
+
chan ㄔㄢ
|
34 |
+
chang ㄔㄤ
|
35 |
+
chao ㄔㄠ
|
36 |
+
che ㄔㄜ
|
37 |
+
chen ㄔㄣ
|
38 |
+
cheng ㄔㄥ
|
39 |
+
chi ㄔ
|
40 |
+
chong ㄔㄨㄥ
|
41 |
+
chou ㄔㄡ
|
42 |
+
chu ㄔㄨ
|
43 |
+
chua ㄔㄨㄚ
|
44 |
+
chuai ㄔㄨㄞ
|
45 |
+
chuan ㄔㄨㄢ
|
46 |
+
chuang ㄔㄨㄤ
|
47 |
+
chui ㄔㄨㄟ
|
48 |
+
chun ㄔㄨㄣ
|
49 |
+
chuo ㄔㄨㄛ
|
50 |
+
ci ㄘ
|
51 |
+
cong ㄘㄨㄥ
|
52 |
+
cou ㄘㄡ
|
53 |
+
cu ㄘㄨ
|
54 |
+
cuan ㄘㄨㄢ
|
55 |
+
cui ㄘㄨㄟ
|
56 |
+
cun ㄘㄨㄣ
|
57 |
+
cuo ㄘㄨㄛ
|
58 |
+
da ㄉㄚ
|
59 |
+
dai ㄉㄞ
|
60 |
+
dan ㄉㄢ
|
61 |
+
dang ㄉㄤ
|
62 |
+
dao ㄉㄠ
|
63 |
+
de ㄉㄜ
|
64 |
+
dei ㄉㄟ
|
65 |
+
den ㄉㄣ
|
66 |
+
deng ㄉㄥ
|
67 |
+
di ㄉㄧ
|
68 |
+
dia ㄉㄧㄚ
|
69 |
+
dian ㄉㄧㄢ
|
70 |
+
diao ㄉㄧㄠ
|
71 |
+
die ㄉㄧㄝ
|
72 |
+
din ㄉㄧㄣ
|
73 |
+
ding ㄉㄧㄥ
|
74 |
+
diu ㄉㄧㄡ
|
75 |
+
dong ㄉㄨㄥ
|
76 |
+
dou ㄉㄡ
|
77 |
+
du ㄉㄨ
|
78 |
+
duan ㄉㄨㄢ
|
79 |
+
dui ㄉㄨㄟ
|
80 |
+
dun ㄉㄨㄣ
|
81 |
+
duo ㄉㄨㄛ
|
82 |
+
e ㄜ
|
83 |
+
ei ㄟ
|
84 |
+
en ㄣ
|
85 |
+
eng ㄥ
|
86 |
+
er ㄦ
|
87 |
+
fa ㄈㄚ
|
88 |
+
fan ㄈㄢ
|
89 |
+
fang ㄈㄤ
|
90 |
+
fei ㄈㄟ
|
91 |
+
fen ㄈㄣ
|
92 |
+
feng ㄈㄥ
|
93 |
+
fo ㄈㄛ
|
94 |
+
fou ㄈㄡ
|
95 |
+
fu ㄈㄨ
|
96 |
+
ga ㄍㄚ
|
97 |
+
gai ㄍㄞ
|
98 |
+
gan ㄍㄢ
|
99 |
+
gang ㄍㄤ
|
100 |
+
gao ㄍㄠ
|
101 |
+
ge ㄍㄜ
|
102 |
+
gei ㄍㄟ
|
103 |
+
gen ㄍㄣ
|
104 |
+
geng ㄍㄥ
|
105 |
+
gong ㄍㄨㄥ
|
106 |
+
gou ㄍㄡ
|
107 |
+
gu ㄍㄨ
|
108 |
+
gua ㄍㄨㄚ
|
109 |
+
guai ㄍㄨㄞ
|
110 |
+
guan ㄍㄨㄢ
|
111 |
+
guang ㄍㄨㄤ
|
112 |
+
gui ㄍㄨㄟ
|
113 |
+
gun ㄍㄨㄣ
|
114 |
+
guo ㄍㄨㄛ
|
115 |
+
ha ㄏㄚ
|
116 |
+
hai ㄏㄞ
|
117 |
+
han ㄏㄢ
|
118 |
+
hang ㄏㄤ
|
119 |
+
hao ㄏㄠ
|
120 |
+
he ㄏㄜ
|
121 |
+
hei ㄏㄟ
|
122 |
+
hen ㄏㄣ
|
123 |
+
heng ㄏㄥ
|
124 |
+
hm ㄏㄇ
|
125 |
+
hong ㄏㄨㄥ
|
126 |
+
hou ㄏㄡ
|
127 |
+
hu ㄏㄨ
|
128 |
+
hua ㄏㄨㄚ
|
129 |
+
huai ㄏㄨㄞ
|
130 |
+
huan ㄏㄨㄢ
|
131 |
+
huang ㄏㄨㄤ
|
132 |
+
hui ㄏㄨㄟ
|
133 |
+
hun ㄏㄨㄣ
|
134 |
+
huo ㄏㄨㄛ
|
135 |
+
ji ㄐㄧ
|
136 |
+
jia ㄐㄧㄚ
|
137 |
+
jian ㄐㄧㄢ
|
138 |
+
jiang ㄐㄧㄤ
|
139 |
+
jiao ㄐㄧㄠ
|
140 |
+
jie ㄐㄧㄝ
|
141 |
+
jin ㄐㄧㄣ
|
142 |
+
jing ㄐㄧㄥ
|
143 |
+
jiong ㄐㄩㄥ
|
144 |
+
jiu ㄐㄧㄡ
|
145 |
+
ju ㄐㄩ
|
146 |
+
jv ㄐㄩ
|
147 |
+
juan ㄐㄩㄢ
|
148 |
+
jvan ㄐㄩㄢ
|
149 |
+
jue ㄐㄩㄝ
|
150 |
+
jve ㄐㄩㄝ
|
151 |
+
jun ㄐㄩㄣ
|
152 |
+
ka ㄎㄚ
|
153 |
+
kai ㄎㄞ
|
154 |
+
kan ㄎㄢ
|
155 |
+
kang ㄎㄤ
|
156 |
+
kao ㄎㄠ
|
157 |
+
ke ㄎㄜ
|
158 |
+
kei ㄎㄟ
|
159 |
+
ken ㄎㄣ
|
160 |
+
keng ㄎㄥ
|
161 |
+
kong ㄎㄨㄥ
|
162 |
+
kou ㄎㄡ
|
163 |
+
ku ㄎㄨ
|
164 |
+
kua ㄎㄨㄚ
|
165 |
+
kuai ㄎㄨㄞ
|
166 |
+
kuan ㄎㄨㄢ
|
167 |
+
kuang ㄎㄨㄤ
|
168 |
+
kui ㄎㄨㄟ
|
169 |
+
kun ㄎㄨㄣ
|
170 |
+
kuo ㄎㄨㄛ
|
171 |
+
la ㄌㄚ
|
172 |
+
lai ㄌㄞ
|
173 |
+
lan ㄌㄢ
|
174 |
+
lang ㄌㄤ
|
175 |
+
lao ㄌㄠ
|
176 |
+
le ㄌㄜ
|
177 |
+
lei ㄌㄟ
|
178 |
+
leng ㄌㄥ
|
179 |
+
li ㄌㄧ
|
180 |
+
lia ㄌㄧㄚ
|
181 |
+
lian ㄌㄧㄢ
|
182 |
+
liang ㄌㄧㄤ
|
183 |
+
liao ㄌㄧㄠ
|
184 |
+
lie ㄌㄧㄝ
|
185 |
+
lin ㄌㄧㄣ
|
186 |
+
ling ㄌㄧㄥ
|
187 |
+
liu ㄌㄧㄡ
|
188 |
+
lo ㄌㄛ
|
189 |
+
long ㄌㄨㄥ
|
190 |
+
lou ㄌㄡ
|
191 |
+
lu ㄌㄨ
|
192 |
+
luan ㄌㄨㄢ
|
193 |
+
lue ㄌㄩㄝ
|
194 |
+
lun ㄌㄨㄣ
|
195 |
+
luo ㄌㄨㄛ
|
196 |
+
lv ㄌㄩ
|
197 |
+
lve ㄌㄩㄝ
|
198 |
+
m ㄇㄨ
|
199 |
+
ma ㄇㄚ
|
200 |
+
mai ㄇㄞ
|
201 |
+
man ㄇㄢ
|
202 |
+
mang ㄇㄤ
|
203 |
+
mao ㄇㄠ
|
204 |
+
me ㄇㄜ
|
205 |
+
mei ㄇㄟ
|
206 |
+
men ㄇㄣ
|
207 |
+
meng ㄇㄥ
|
208 |
+
mi ㄇㄧ
|
209 |
+
mian ㄇㄧㄢ
|
210 |
+
miao ㄇㄧㄠ
|
211 |
+
mie ㄇㄧㄝ
|
212 |
+
min ㄇㄧㄣ
|
213 |
+
ming ㄇㄧㄥ
|
214 |
+
miu ㄇㄧㄡ
|
215 |
+
mo ㄇㄛ
|
216 |
+
mou ㄇㄡ
|
217 |
+
mu ㄇㄨ
|
218 |
+
n ㄣ
|
219 |
+
na ㄋㄚ
|
220 |
+
nai ㄋㄞ
|
221 |
+
nan ㄋㄢ
|
222 |
+
nang ㄋㄤ
|
223 |
+
nao ㄋㄠ
|
224 |
+
ne ㄋㄜ
|
225 |
+
nei ㄋㄟ
|
226 |
+
nen ㄋㄣ
|
227 |
+
neng ㄋㄥ
|
228 |
+
ng ㄣ
|
229 |
+
ni ㄋㄧ
|
230 |
+
nian ㄋㄧㄢ
|
231 |
+
niang ㄋㄧㄤ
|
232 |
+
niao ㄋㄧㄠ
|
233 |
+
nie ㄋㄧㄝ
|
234 |
+
nin ㄋㄧㄣ
|
235 |
+
ning ㄋㄧㄥ
|
236 |
+
niu ㄋㄧㄡ
|
237 |
+
nong ㄋㄨㄥ
|
238 |
+
nou ㄋㄡ
|
239 |
+
nu ㄋㄨ
|
240 |
+
nuan ㄋㄨㄢ
|
241 |
+
nue ㄋㄩㄝ
|
242 |
+
nun ㄋㄨㄣ
|
243 |
+
nuo ㄋㄨㄛ
|
244 |
+
nv ㄋㄩ
|
245 |
+
nve ㄋㄩㄝ
|
246 |
+
o ㄛ
|
247 |
+
ou ㄡ
|
248 |
+
pa ㄆㄚ
|
249 |
+
pai ㄆㄞ
|
250 |
+
pan ㄆㄢ
|
251 |
+
pang ㄆㄤ
|
252 |
+
pao ㄆㄠ
|
253 |
+
pei ㄆㄟ
|
254 |
+
pen ㄆㄣ
|
255 |
+
peng ㄆㄥ
|
256 |
+
pi ㄆㄧ
|
257 |
+
pian ㄆㄧㄢ
|
258 |
+
piao ㄆㄧㄠ
|
259 |
+
pie ㄆㄧㄝ
|
260 |
+
pin ㄆㄧㄣ
|
261 |
+
ping ㄆㄧㄥ
|
262 |
+
po ㄆㄛ
|
263 |
+
pou ㄆㄡ
|
264 |
+
pu ㄆㄨ
|
265 |
+
qi ㄑㄧ
|
266 |
+
qia ㄑㄧㄚ
|
267 |
+
qian ㄑㄧㄢ
|
268 |
+
qiang ㄑㄧㄤ
|
269 |
+
qiao ㄑㄧㄠ
|
270 |
+
qie ㄑㄧㄝ
|
271 |
+
qin ㄑㄧㄣ
|
272 |
+
qing ㄑㄧㄥ
|
273 |
+
qiong ㄑㄩㄥ
|
274 |
+
qiu ㄑㄧㄡ
|
275 |
+
qu ㄑㄩ
|
276 |
+
quan ㄑㄩㄢ
|
277 |
+
qvan ㄑㄩㄢ
|
278 |
+
que ㄑㄩㄝ
|
279 |
+
qun ㄑㄩㄣ
|
280 |
+
ran ㄖㄢ
|
281 |
+
rang ㄖㄤ
|
282 |
+
rao ㄖㄠ
|
283 |
+
re ㄖㄜ
|
284 |
+
ren ㄖㄣ
|
285 |
+
reng ㄖㄥ
|
286 |
+
ri ㄖ
|
287 |
+
rong ㄖㄨㄥ
|
288 |
+
rou ㄖㄡ
|
289 |
+
ru ㄖㄨ
|
290 |
+
rua ㄖㄨㄚ
|
291 |
+
ruan ㄖㄨㄢ
|
292 |
+
rui ㄖㄨㄟ
|
293 |
+
run ㄖㄨㄣ
|
294 |
+
ruo ㄖㄨㄛ
|
295 |
+
sa ㄙㄚ
|
296 |
+
sai ㄙㄞ
|
297 |
+
san ㄙㄢ
|
298 |
+
sang ㄙㄤ
|
299 |
+
sao ㄙㄠ
|
300 |
+
se ㄙㄜ
|
301 |
+
sen ㄙㄣ
|
302 |
+
seng ㄙㄥ
|
303 |
+
sha ㄕㄚ
|
304 |
+
shai ㄕㄞ
|
305 |
+
shan ㄕㄢ
|
306 |
+
shang ㄕㄤ
|
307 |
+
shao ㄕㄠ
|
308 |
+
she ㄕㄜ
|
309 |
+
shei ㄕㄟ
|
310 |
+
shen ㄕㄣ
|
311 |
+
sheng ㄕㄥ
|
312 |
+
shi ㄕ
|
313 |
+
shou ㄕㄡ
|
314 |
+
shu ㄕㄨ
|
315 |
+
shua ㄕㄨㄚ
|
316 |
+
shuai ㄕㄨㄞ
|
317 |
+
shuan ㄕㄨㄢ
|
318 |
+
shuang ㄕㄨㄤ
|
319 |
+
shui ㄕㄨㄟ
|
320 |
+
shun ㄕㄨㄣ
|
321 |
+
shuo ㄕㄨㄛ
|
322 |
+
si ㄙ
|
323 |
+
song ㄙㄨㄥ
|
324 |
+
sou ㄙㄡ
|
325 |
+
su ㄙㄨ
|
326 |
+
suan ㄙㄨㄢ
|
327 |
+
sui ㄙㄨㄟ
|
328 |
+
sun ㄙㄨㄣ
|
329 |
+
suo ㄙㄨㄛ
|
330 |
+
ta ㄊㄚ
|
331 |
+
tai ㄊㄞ
|
332 |
+
tan ㄊㄢ
|
333 |
+
tang ㄊㄤ
|
334 |
+
tao ㄊㄠ
|
335 |
+
te ㄊㄜ
|
336 |
+
tei ㄊㄟ
|
337 |
+
teng ㄊㄥ
|
338 |
+
ti ㄊㄧ
|
339 |
+
tian ㄊㄧㄢ
|
340 |
+
tiao ㄊㄧㄠ
|
341 |
+
tie ㄊㄧㄝ
|
342 |
+
ting ㄊㄧㄥ
|
343 |
+
tong ㄊㄨㄥ
|
344 |
+
tou ㄊㄡ
|
345 |
+
tsuo ㄘㄨㄛ
|
346 |
+
tu ㄊㄨ
|
347 |
+
tuan ㄊㄨㄢ
|
348 |
+
tui ㄊㄨㄟ
|
349 |
+
tun ㄊㄨㄣ
|
350 |
+
tuo ㄊㄨㄛ
|
351 |
+
tzan ㄗㄢ
|
352 |
+
wa ㄨㄚ
|
353 |
+
wai ㄨㄞ
|
354 |
+
wan ㄨㄢ
|
355 |
+
wang ㄨㄤ
|
356 |
+
wei ㄨㄟ
|
357 |
+
wen ㄨㄣ
|
358 |
+
weng ㄨㄥ
|
359 |
+
wo ㄨㄛ
|
360 |
+
wong ㄨㄥ
|
361 |
+
wu ㄨ
|
362 |
+
xi ㄒㄧ
|
363 |
+
xia ㄒㄧㄚ
|
364 |
+
xian ㄒㄧㄢ
|
365 |
+
xiang ㄒㄧㄤ
|
366 |
+
xiao ㄒㄧㄠ
|
367 |
+
xie ㄒㄧㄝ
|
368 |
+
xin ㄒㄧㄣ
|
369 |
+
xing ㄒㄧㄥ
|
370 |
+
xiong ㄒㄩㄥ
|
371 |
+
xiu ㄒㄧㄡ
|
372 |
+
xu ㄒㄩ
|
373 |
+
xuan ㄒㄩㄢ
|
374 |
+
xue ㄒㄩㄝ
|
375 |
+
xun ㄒㄩㄣ
|
376 |
+
ya ㄧㄚ
|
377 |
+
yai ㄧㄞ
|
378 |
+
yan ㄧㄢ
|
379 |
+
yang ㄧㄤ
|
380 |
+
yao ㄧㄠ
|
381 |
+
ye ㄧㄝ
|
382 |
+
yi ㄧ
|
383 |
+
yin ㄧㄣ
|
384 |
+
ying ㄧㄥ
|
385 |
+
yo ㄧㄛ
|
386 |
+
yong ㄩㄥ
|
387 |
+
you ㄧㄡ
|
388 |
+
yu ㄩ
|
389 |
+
yuan ㄩㄢ
|
390 |
+
yue ㄩㄝ
|
391 |
+
yve ㄩㄝ
|
392 |
+
yun ㄩㄣ
|
393 |
+
za ㄗㄚ
|
394 |
+
zai ㄗㄞ
|
395 |
+
zan ㄗㄢ
|
396 |
+
zang ㄗㄤ
|
397 |
+
zao ㄗㄠ
|
398 |
+
ze ㄗㄜ
|
399 |
+
zei ㄗㄟ
|
400 |
+
zen ㄗㄣ
|
401 |
+
zeng ㄗㄥ
|
402 |
+
zha ㄓㄚ
|
403 |
+
zhai ㄓㄞ
|
404 |
+
zhan ㄓㄢ
|
405 |
+
zhang ㄓㄤ
|
406 |
+
zhao ㄓㄠ
|
407 |
+
zhe ㄓㄜ
|
408 |
+
zhei ㄓㄟ
|
409 |
+
zhen ㄓㄣ
|
410 |
+
zheng ㄓㄥ
|
411 |
+
zhi ㄓ
|
412 |
+
zhong ㄓㄨㄥ
|
413 |
+
zhou ㄓㄡ
|
414 |
+
zhu ㄓㄨ
|
415 |
+
zhua ㄓㄨㄚ
|
416 |
+
zhuai ㄓㄨㄞ
|
417 |
+
zhuan ㄓㄨㄢ
|
418 |
+
zhuang ㄓㄨㄤ
|
419 |
+
zhui ㄓㄨㄟ
|
420 |
+
zhun ㄓㄨㄣ
|
421 |
+
zhuo ㄓㄨㄛ
|
422 |
+
zi ㄗ
|
423 |
+
zong ㄗㄨㄥ
|
424 |
+
zou ㄗㄡ
|
425 |
+
zu ㄗㄨ
|
426 |
+
zuan ㄗㄨㄢ
|
427 |
+
zui ㄗㄨㄟ
|
428 |
+
zun ㄗㄨㄣ
|
429 |
+
zuo ㄗㄨㄛ
|
diffrhythm/g2p/utils/__pycache__/front_utils.cpython-310.pyc
ADDED
Binary file (583 Bytes). View file
|
|
diffrhythm/g2p/utils/__pycache__/front_utils.cpython-311.pyc
ADDED
Binary file (960 Bytes). View file
|
|
diffrhythm/g2p/utils/__pycache__/g2p.cpython-310.pyc
ADDED
Binary file (2.78 kB). View file
|
|