Princess7317 commited on
Commit
3ae9fdb
·
verified ·
1 Parent(s): 5e412c8

Upload 82 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. diffrhythm/.DS_Store +0 -0
  3. diffrhythm/config/defaults.ini +94 -0
  4. diffrhythm/config/diffrhythm-1b.json +13 -0
  5. diffrhythm/g2p/__pycache__/g2p_generation.cpython-310.pyc +0 -0
  6. diffrhythm/g2p/__pycache__/g2p_generation.cpython-311.pyc +0 -0
  7. diffrhythm/g2p/g2p/__init__.py +87 -0
  8. diffrhythm/g2p/g2p/__pycache__/__init__.cpython-310.pyc +0 -0
  9. diffrhythm/g2p/g2p/__pycache__/__init__.cpython-311.pyc +0 -0
  10. diffrhythm/g2p/g2p/__pycache__/chinese_model_g2p.cpython-310.pyc +0 -0
  11. diffrhythm/g2p/g2p/__pycache__/chinese_model_g2p.cpython-311.pyc +0 -0
  12. diffrhythm/g2p/g2p/__pycache__/cleaners.cpython-310.pyc +0 -0
  13. diffrhythm/g2p/g2p/__pycache__/cleaners.cpython-311.pyc +0 -0
  14. diffrhythm/g2p/g2p/__pycache__/english.cpython-310.pyc +0 -0
  15. diffrhythm/g2p/g2p/__pycache__/english.cpython-311.pyc +0 -0
  16. diffrhythm/g2p/g2p/__pycache__/french.cpython-310.pyc +0 -0
  17. diffrhythm/g2p/g2p/__pycache__/french.cpython-311.pyc +0 -0
  18. diffrhythm/g2p/g2p/__pycache__/german.cpython-310.pyc +0 -0
  19. diffrhythm/g2p/g2p/__pycache__/german.cpython-311.pyc +0 -0
  20. diffrhythm/g2p/g2p/__pycache__/japanese.cpython-310.pyc +0 -0
  21. diffrhythm/g2p/g2p/__pycache__/japanese.cpython-311.pyc +0 -0
  22. diffrhythm/g2p/g2p/__pycache__/korean.cpython-310.pyc +0 -0
  23. diffrhythm/g2p/g2p/__pycache__/korean.cpython-311.pyc +0 -0
  24. diffrhythm/g2p/g2p/__pycache__/mandarin.cpython-310.pyc +0 -0
  25. diffrhythm/g2p/g2p/__pycache__/mandarin.cpython-311.pyc +0 -0
  26. diffrhythm/g2p/g2p/__pycache__/text_tokenizers.cpython-310.pyc +0 -0
  27. diffrhythm/g2p/g2p/__pycache__/text_tokenizers.cpython-311.pyc +0 -0
  28. diffrhythm/g2p/g2p/chinese_model_g2p.py +213 -0
  29. diffrhythm/g2p/g2p/cleaners.py +31 -0
  30. diffrhythm/g2p/g2p/english.py +202 -0
  31. diffrhythm/g2p/g2p/french.py +149 -0
  32. diffrhythm/g2p/g2p/german.py +94 -0
  33. diffrhythm/g2p/g2p/japanese.py +816 -0
  34. diffrhythm/g2p/g2p/korean.py +81 -0
  35. diffrhythm/g2p/g2p/mandarin.py +600 -0
  36. diffrhythm/g2p/g2p/text_tokenizers.py +85 -0
  37. diffrhythm/g2p/g2p/vocab.json +372 -0
  38. diffrhythm/g2p/g2p_generation.py +133 -0
  39. diffrhythm/g2p/sources/bpmf_2_pinyin.txt +41 -0
  40. diffrhythm/g2p/sources/chinese_lexicon.txt +3 -0
  41. diffrhythm/g2p/sources/g2p_chinese_model/config.json +819 -0
  42. diffrhythm/g2p/sources/g2p_chinese_model/poly_bert_model.onnx +3 -0
  43. diffrhythm/g2p/sources/g2p_chinese_model/polychar.txt +159 -0
  44. diffrhythm/g2p/sources/g2p_chinese_model/polydict.json +393 -0
  45. diffrhythm/g2p/sources/g2p_chinese_model/polydict_r.json +393 -0
  46. diffrhythm/g2p/sources/g2p_chinese_model/vocab.txt +0 -0
  47. diffrhythm/g2p/sources/pinyin_2_bpmf.txt +429 -0
  48. diffrhythm/g2p/utils/__pycache__/front_utils.cpython-310.pyc +0 -0
  49. diffrhythm/g2p/utils/__pycache__/front_utils.cpython-311.pyc +0 -0
  50. diffrhythm/g2p/utils/__pycache__/g2p.cpython-310.pyc +0 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ diffrhythm/g2p/sources/chinese_lexicon.txt filter=lfs diff=lfs merge=lfs -text
diffrhythm/.DS_Store ADDED
Binary file (6.15 kB). View file
 
diffrhythm/config/defaults.ini ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ [DEFAULTS]
3
+
4
+ #name of the run
5
+ exp_name = F5
6
+
7
+ # the batch size
8
+ batch_size = 8
9
+
10
+ # the chunk size
11
+ max_frames = 3000
12
+ min_frames = 10
13
+
14
+ # number of CPU workers for the DataLoader
15
+ num_workers = 4
16
+
17
+ # the random seed
18
+ seed = 42
19
+
20
+ # Batches for gradient accumulation
21
+ accum_batches = 1
22
+
23
+ # Number of steps between checkpoints
24
+ checkpoint_every = 10000
25
+
26
+ # trainer checkpoint file to restart training from
27
+ ckpt_path = ''
28
+
29
+ # model checkpoint file to start a new training run from
30
+ pretrained_ckpt_path = ''
31
+
32
+ # Checkpoint path for the pretransform model if needed
33
+ pretransform_ckpt_path = ''
34
+
35
+ # configuration model specifying model hyperparameters
36
+ model_config = ''
37
+
38
+ # configuration for datasets
39
+ dataset_config = ''
40
+
41
+ # directory to save the checkpoints in
42
+ save_dir = ''
43
+
44
+ # grad norm
45
+ max_grad_norm = 1.0
46
+
47
+ # grad accu
48
+ grad_accumulation_steps = 1
49
+
50
+ # lr
51
+ learning_rate = 7.5e-5
52
+
53
+ # epoch
54
+ epochs = 110
55
+
56
+ # warmup steps
57
+ num_warmup_updates = 2000
58
+
59
+ # save checkpoint per steps
60
+ save_per_updates = 5000
61
+
62
+ # save last checkpoint per steps
63
+ last_per_steps = 5000
64
+
65
+ prompt_path = "/mnt/sfs/music/lance/style-lance-full|/mnt/sfs/music/lance/style-lance-cnen-music-second"
66
+ lrc_path = "/mnt/sfs/music/lance/lrc-lance-emb-full|/mnt/sfs/music/lance/lrc-lance-cnen-second"
67
+ latent_path = "/mnt/sfs/music/lance/latent-lance|/mnt/sfs/music/lance/latent-lance-cnen-music-second-1|/mnt/sfs/music/lance/latent-lance-cnen-music-second-2"
68
+
69
+ audio_drop_prob = 0.3
70
+ cond_drop_prob = 0.0
71
+ style_drop_prob = 0.1
72
+ lrc_drop_prob = 0.1
73
+
74
+ align_lyrics = 0
75
+ lyrics_slice = 0
76
+ parse_lyrics = 1
77
+ skip_empty_lyrics = 0
78
+ lyrics_shift = -1
79
+
80
+ use_style_prompt = 1
81
+
82
+ tokenizer_type = gpt2
83
+
84
+ reset_lr = 0
85
+
86
+ resumable_with_seed = 666
87
+
88
+ downsample_rate = 2048
89
+
90
+ grad_ckpt = 0
91
+
92
+ dataset_path = "/mnt/sfs/music/hkchen/workspace/F5-TTS-HW/filelists/music123latent_asred_bpmstyle_cnen_pure1"
93
+
94
+ pure_prob = 0.0
diffrhythm/config/diffrhythm-1b.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "diffrhythm",
3
+ "model": {
4
+ "dim": 2048,
5
+ "depth": 16,
6
+ "heads": 32,
7
+ "ff_mult": 4,
8
+ "text_dim": 512,
9
+ "conv_layers": 4,
10
+ "mel_dim": 64,
11
+ "text_num_embeds": 363
12
+ }
13
+ }
diffrhythm/g2p/__pycache__/g2p_generation.cpython-310.pyc ADDED
Binary file (2.61 kB). View file
 
diffrhythm/g2p/__pycache__/g2p_generation.cpython-311.pyc ADDED
Binary file (4.85 kB). View file
 
diffrhythm/g2p/g2p/__init__.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2024 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ from diffrhythm.g2p.g2p import cleaners
7
+ from tokenizers import Tokenizer
8
+ from diffrhythm.g2p.g2p.text_tokenizers import TextTokenizer
9
+ import LangSegment
10
+ import json
11
+ import re
12
+
13
+
14
+ class PhonemeBpeTokenizer:
15
+
16
+ def __init__(self, vacab_path="./diffrhythm/g2p/g2p/vocab.json"):
17
+ self.lang2backend = {
18
+ "zh": "cmn",
19
+ "ja": "ja",
20
+ "en": "en-us",
21
+ "fr": "fr-fr",
22
+ "ko": "ko",
23
+ "de": "de",
24
+ }
25
+ self.text_tokenizers = {}
26
+ self.int_text_tokenizers()
27
+
28
+ with open(vacab_path, "r") as f:
29
+ json_data = f.read()
30
+ data = json.loads(json_data)
31
+ self.vocab = data["vocab"]
32
+ LangSegment.setfilters(["en", "zh", "ja", "ko", "fr", "de"])
33
+
34
+ def int_text_tokenizers(self):
35
+ for key, value in self.lang2backend.items():
36
+ self.text_tokenizers[key] = TextTokenizer(language=value)
37
+
38
+ def tokenize(self, text, sentence, language):
39
+
40
+ # 1. convert text to phoneme
41
+ phonemes = []
42
+ if language == "auto":
43
+ seglist = LangSegment.getTexts(text)
44
+ tmp_ph = []
45
+ for seg in seglist:
46
+ tmp_ph.append(
47
+ self._clean_text(
48
+ seg["text"], sentence, seg["lang"], ["cjekfd_cleaners"]
49
+ )
50
+ )
51
+ phonemes = "|_|".join(tmp_ph)
52
+ else:
53
+ phonemes = self._clean_text(text, sentence, language, ["cjekfd_cleaners"])
54
+ # print('clean text: ', phonemes)
55
+
56
+ # 2. tokenize phonemes
57
+ phoneme_tokens = self.phoneme2token(phonemes)
58
+ # print('encode: ', phoneme_tokens)
59
+
60
+ # # 3. decode tokens [optional]
61
+ # decoded_text = self.tokenizer.decode(phoneme_tokens)
62
+ # print('decoded: ', decoded_text)
63
+
64
+ return phonemes, phoneme_tokens
65
+
66
+ def _clean_text(self, text, sentence, language, cleaner_names):
67
+ for name in cleaner_names:
68
+ cleaner = getattr(cleaners, name)
69
+ if not cleaner:
70
+ raise Exception("Unknown cleaner: %s" % name)
71
+ text = cleaner(text, sentence, language, self.text_tokenizers)
72
+ return text
73
+
74
+ def phoneme2token(self, phonemes):
75
+ tokens = []
76
+ if isinstance(phonemes, list):
77
+ for phone in phonemes:
78
+ phone = phone.split("\t")[0]
79
+ phonemes_split = phone.split("|")
80
+ tokens.append(
81
+ [self.vocab[p] for p in phonemes_split if p in self.vocab]
82
+ )
83
+ else:
84
+ phonemes = phonemes.split("\t")[0]
85
+ phonemes_split = phonemes.split("|")
86
+ tokens = [self.vocab[p] for p in phonemes_split if p in self.vocab]
87
+ return tokens
diffrhythm/g2p/g2p/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (2.62 kB). View file
 
diffrhythm/g2p/g2p/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (4.6 kB). View file
 
diffrhythm/g2p/g2p/__pycache__/chinese_model_g2p.cpython-310.pyc ADDED
Binary file (6.87 kB). View file
 
diffrhythm/g2p/g2p/__pycache__/chinese_model_g2p.cpython-311.pyc ADDED
Binary file (13.1 kB). View file
 
diffrhythm/g2p/g2p/__pycache__/cleaners.cpython-310.pyc ADDED
Binary file (950 Bytes). View file
 
diffrhythm/g2p/g2p/__pycache__/cleaners.cpython-311.pyc ADDED
Binary file (1.5 kB). View file
 
diffrhythm/g2p/g2p/__pycache__/english.cpython-310.pyc ADDED
Binary file (4.93 kB). View file
 
diffrhythm/g2p/g2p/__pycache__/english.cpython-311.pyc ADDED
Binary file (9.27 kB). View file
 
diffrhythm/g2p/g2p/__pycache__/french.cpython-310.pyc ADDED
Binary file (3.66 kB). View file
 
diffrhythm/g2p/g2p/__pycache__/french.cpython-311.pyc ADDED
Binary file (5.74 kB). View file
 
diffrhythm/g2p/g2p/__pycache__/german.cpython-310.pyc ADDED
Binary file (2.45 kB). View file
 
diffrhythm/g2p/g2p/__pycache__/german.cpython-311.pyc ADDED
Binary file (4.03 kB). View file
 
diffrhythm/g2p/g2p/__pycache__/japanese.cpython-310.pyc ADDED
Binary file (17.7 kB). View file
 
diffrhythm/g2p/g2p/__pycache__/japanese.cpython-311.pyc ADDED
Binary file (28.1 kB). View file
 
diffrhythm/g2p/g2p/__pycache__/korean.cpython-310.pyc ADDED
Binary file (1.94 kB). View file
 
diffrhythm/g2p/g2p/__pycache__/korean.cpython-311.pyc ADDED
Binary file (2.91 kB). View file
 
diffrhythm/g2p/g2p/__pycache__/mandarin.cpython-310.pyc ADDED
Binary file (12.6 kB). View file
 
diffrhythm/g2p/g2p/__pycache__/mandarin.cpython-311.pyc ADDED
Binary file (25.2 kB). View file
 
diffrhythm/g2p/g2p/__pycache__/text_tokenizers.cpython-310.pyc ADDED
Binary file (2.67 kB). View file
 
diffrhythm/g2p/g2p/__pycache__/text_tokenizers.cpython-311.pyc ADDED
Binary file (4.78 kB). View file
 
diffrhythm/g2p/g2p/chinese_model_g2p.py ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2024 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import os
7
+ import numpy as np
8
+ import torch
9
+ from torch.utils.data import DataLoader
10
+ import json
11
+ from transformers import BertTokenizer
12
+ from torch.utils.data import Dataset
13
+ from transformers.models.bert.modeling_bert import *
14
+ import torch
15
+ import torch.nn.functional as F
16
+ from onnxruntime import InferenceSession, GraphOptimizationLevel, SessionOptions
17
+
18
+
19
+ class PolyDataset(Dataset):
20
+ def __init__(self, words, labels, word_pad_idx=0, label_pad_idx=-1):
21
+ self.dataset = self.preprocess(words, labels)
22
+ self.word_pad_idx = word_pad_idx
23
+ self.label_pad_idx = label_pad_idx
24
+
25
+ def preprocess(self, origin_sentences, origin_labels):
26
+ """
27
+ Maps tokens and tags to their indices and stores them in the dict data.
28
+ examples:
29
+ word:['[CLS]', '浙', '商', '银', '行', '企', '业', '信', '贷', '部']
30
+ sentence:([101, 3851, 1555, 7213, 6121, 821, 689, 928, 6587, 6956],
31
+ array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]))
32
+ label:[3, 13, 13, 13, 0, 0, 0, 0, 0]
33
+ """
34
+ data = []
35
+ labels = []
36
+ sentences = []
37
+ # tokenize
38
+ for line in origin_sentences:
39
+ # replace each token by its index
40
+ # we can not use encode_plus because our sentences are aligned to labels in list type
41
+ words = []
42
+ word_lens = []
43
+ for token in line:
44
+ words.append(token)
45
+ word_lens.append(1)
46
+ token_start_idxs = 1 + np.cumsum([0] + word_lens[:-1])
47
+ sentences.append(((words, token_start_idxs), 0))
48
+ ###
49
+ for tag in origin_labels:
50
+ labels.append(tag)
51
+
52
+ for sentence, label in zip(sentences, labels):
53
+ data.append((sentence, label))
54
+ return data
55
+
56
+ def __getitem__(self, idx):
57
+ """sample data to get batch"""
58
+ word = self.dataset[idx][0]
59
+ label = self.dataset[idx][1]
60
+ return [word, label]
61
+
62
+ def __len__(self):
63
+ """get dataset size"""
64
+ return len(self.dataset)
65
+
66
+ def collate_fn(self, batch):
67
+
68
+ sentences = [x[0][0] for x in batch]
69
+ ori_sents = [x[0][1] for x in batch]
70
+ labels = [x[1] for x in batch]
71
+ batch_len = len(sentences)
72
+
73
+ # compute length of longest sentence in batch
74
+ max_len = max([len(s[0]) for s in sentences])
75
+ max_label_len = 0
76
+ batch_data = np.ones((batch_len, max_len))
77
+ batch_label_starts = []
78
+
79
+ # padding and aligning
80
+ for j in range(batch_len):
81
+ cur_len = len(sentences[j][0])
82
+ batch_data[j][:cur_len] = sentences[j][0]
83
+ label_start_idx = sentences[j][-1]
84
+ label_starts = np.zeros(max_len)
85
+ label_starts[[idx for idx in label_start_idx if idx < max_len]] = 1
86
+ batch_label_starts.append(label_starts)
87
+ max_label_len = max(int(sum(label_starts)), max_label_len)
88
+
89
+ # padding label
90
+ batch_labels = self.label_pad_idx * np.ones((batch_len, max_label_len))
91
+ batch_pmasks = self.label_pad_idx * np.ones((batch_len, max_label_len))
92
+ for j in range(batch_len):
93
+ cur_tags_len = len(labels[j])
94
+ batch_labels[j][:cur_tags_len] = labels[j]
95
+ batch_pmasks[j][:cur_tags_len] = [
96
+ 1 if item > 0 else 0 for item in labels[j]
97
+ ]
98
+
99
+ # convert data to torch LongTensors
100
+ batch_data = torch.tensor(batch_data, dtype=torch.long)
101
+ batch_label_starts = torch.tensor(batch_label_starts, dtype=torch.long)
102
+ batch_labels = torch.tensor(batch_labels, dtype=torch.long)
103
+ batch_pmasks = torch.tensor(batch_pmasks, dtype=torch.long)
104
+ return [batch_data, batch_label_starts, batch_labels, batch_pmasks, ori_sents]
105
+
106
+
107
+ class BertPolyPredict:
108
+ def __init__(self, bert_model, jsonr_file, json_file):
109
+ self.tokenizer = BertTokenizer.from_pretrained(bert_model, do_lower_case=True)
110
+ with open(jsonr_file, "r", encoding="utf8") as fp:
111
+ self.pron_dict = json.load(fp)
112
+ with open(json_file, "r", encoding="utf8") as fp:
113
+ self.pron_dict_id_2_pinyin = json.load(fp)
114
+ self.num_polyphone = len(self.pron_dict)
115
+ self.device = "cpu"
116
+ self.polydataset = PolyDataset
117
+ options = SessionOptions() # initialize session options
118
+ options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
119
+ print(os.path.join(bert_model, "poly_bert_model.onnx"))
120
+ self.session = InferenceSession(
121
+ os.path.join(bert_model, "poly_bert_model.onnx"),
122
+ sess_options=options,
123
+ providers=[
124
+ "CoreMLExecutionProvider", # Replace CUDA with CoreML
125
+ "CPUExecutionProvider",
126
+ ], # CPUExecutionProvider #CUDAExecutionProvider
127
+ )
128
+ # self.session.set_providers(['CUDAExecutionProvider', "CPUExecutionProvider"], [ {'device_id': 0}])
129
+
130
+ # disable session.run() fallback mechanism, it prevents for a reset of the execution provider
131
+ self.session.disable_fallback()
132
+
133
+ def predict_process(self, txt_list):
134
+ word_test, label_test, texts_test = self.get_examples_po(txt_list)
135
+ data = self.polydataset(word_test, label_test)
136
+ predict_loader = DataLoader(
137
+ data, batch_size=1, shuffle=False, collate_fn=data.collate_fn
138
+ )
139
+ pred_tags = self.predict_onnx(predict_loader)
140
+ return pred_tags
141
+
142
+ def predict_onnx(self, dev_loader):
143
+ pred_tags = []
144
+ with torch.no_grad():
145
+ for idx, batch_samples in enumerate(dev_loader):
146
+ # [batch_data, batch_label_starts, batch_labels, batch_pmasks, ori_sents]
147
+ batch_data, batch_label_starts, batch_labels, batch_pmasks, _ = (
148
+ batch_samples
149
+ )
150
+ # shift tensors to GPU if available
151
+ batch_data = batch_data.to(self.device)
152
+ batch_label_starts = batch_label_starts.to(self.device)
153
+ batch_labels = batch_labels.to(self.device)
154
+ batch_pmasks = batch_pmasks.to(self.device)
155
+ batch_data = np.asarray(batch_data, dtype=np.float32)
156
+ batch_pmasks = np.asarray(batch_pmasks, dtype=np.float32)
157
+ # batch_output = self.session.run(output_names=['outputs'], input_feed={"input_ids":batch_data, "input_pmasks": batch_pmasks})[0][0]
158
+ batch_output = self.session.run(
159
+ output_names=["outputs"], input_feed={"input_ids": batch_data}
160
+ )[0]
161
+ label_masks = batch_pmasks == 1
162
+ batch_labels = batch_labels.to("cpu").numpy()
163
+ for i, indices in enumerate(np.argmax(batch_output, axis=2)):
164
+ for j, idx in enumerate(indices):
165
+ if label_masks[i][j]:
166
+ # pred_tag.append(idx)
167
+ pred_tags.append(self.pron_dict_id_2_pinyin[str(idx + 1)])
168
+ return pred_tags
169
+
170
+ def get_examples_po(self, text_list):
171
+
172
+ word_list = []
173
+ label_list = []
174
+ sentence_list = []
175
+ id = 0
176
+ for line in [text_list]:
177
+ sentence = line[0]
178
+ words = []
179
+ tokens = line[0]
180
+ index = line[-1]
181
+ front = index
182
+ back = len(tokens) - index - 1
183
+ labels = [0] * front + [1] + [0] * back
184
+ words = ["[CLS]"] + [item for item in sentence]
185
+ words = self.tokenizer.convert_tokens_to_ids(words)
186
+ word_list.append(words)
187
+ label_list.append(labels)
188
+ sentence_list.append(sentence)
189
+
190
+ id += 1
191
+ # mask_list.append(masks)
192
+ assert len(labels) + 1 == len(words), print(
193
+ (
194
+ poly,
195
+ sentence,
196
+ words,
197
+ labels,
198
+ sentence,
199
+ len(sentence),
200
+ len(words),
201
+ len(labels),
202
+ )
203
+ )
204
+ assert len(labels) + 1 == len(
205
+ words
206
+ ), "Number of labels does not match number of words"
207
+ assert len(labels) == len(
208
+ sentence
209
+ ), "Number of labels does not match number of sentences"
210
+ assert len(word_list) == len(
211
+ label_list
212
+ ), "Number of label sentences does not match number of word sentences"
213
+ return word_list, label_list, text_list
diffrhythm/g2p/g2p/cleaners.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2024 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import re
7
+ from diffrhythm.g2p.g2p.japanese import japanese_to_ipa
8
+ from diffrhythm.g2p.g2p.mandarin import chinese_to_ipa
9
+ from diffrhythm.g2p.g2p.english import english_to_ipa
10
+ from diffrhythm.g2p.g2p.french import french_to_ipa
11
+ from diffrhythm.g2p.g2p.korean import korean_to_ipa
12
+ from diffrhythm.g2p.g2p.german import german_to_ipa
13
+
14
+
15
+ def cjekfd_cleaners(text, sentence, language, text_tokenizers):
16
+
17
+ if language == "zh":
18
+ return chinese_to_ipa(text, sentence, text_tokenizers["zh"])
19
+ elif language == "ja":
20
+ return japanese_to_ipa(text, text_tokenizers["ja"])
21
+ elif language == "en":
22
+ return english_to_ipa(text, text_tokenizers["en"])
23
+ elif language == "fr":
24
+ return french_to_ipa(text, text_tokenizers["fr"])
25
+ elif language == "ko":
26
+ return korean_to_ipa(text, text_tokenizers["ko"])
27
+ elif language == "de":
28
+ return german_to_ipa(text, text_tokenizers["de"])
29
+ else:
30
+ raise Exception("Unknown language: %s" % language)
31
+ return None
diffrhythm/g2p/g2p/english.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2024 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import re
7
+ from unidecode import unidecode
8
+ import inflect
9
+
10
+ """
11
+ Text clean time
12
+ """
13
+ _inflect = inflect.engine()
14
+ _comma_number_re = re.compile(r"([0-9][0-9\,]+[0-9])")
15
+ _decimal_number_re = re.compile(r"([0-9]+\.[0-9]+)")
16
+ _percent_number_re = re.compile(r"([0-9\.\,]*[0-9]+%)")
17
+ _pounds_re = re.compile(r"£([0-9\,]*[0-9]+)")
18
+ _dollars_re = re.compile(r"\$([0-9\.\,]*[0-9]+)")
19
+ _fraction_re = re.compile(r"([0-9]+)/([0-9]+)")
20
+ _ordinal_re = re.compile(r"[0-9]+(st|nd|rd|th)")
21
+ _number_re = re.compile(r"[0-9]+")
22
+
23
+ # List of (regular expression, replacement) pairs for abbreviations:
24
+ _abbreviations = [
25
+ (re.compile("\\b%s\\b" % x[0], re.IGNORECASE), x[1])
26
+ for x in [
27
+ ("mrs", "misess"),
28
+ ("mr", "mister"),
29
+ ("dr", "doctor"),
30
+ ("st", "saint"),
31
+ ("co", "company"),
32
+ ("jr", "junior"),
33
+ ("maj", "major"),
34
+ ("gen", "general"),
35
+ ("drs", "doctors"),
36
+ ("rev", "reverend"),
37
+ ("lt", "lieutenant"),
38
+ ("hon", "honorable"),
39
+ ("sgt", "sergeant"),
40
+ ("capt", "captain"),
41
+ ("esq", "esquire"),
42
+ ("ltd", "limited"),
43
+ ("col", "colonel"),
44
+ ("ft", "fort"),
45
+ ("etc", "et cetera"),
46
+ ("btw", "by the way"),
47
+ ]
48
+ ]
49
+
50
+ _special_map = [
51
+ ("t|ɹ", "tɹ"),
52
+ ("d|ɹ", "dɹ"),
53
+ ("t|s", "ts"),
54
+ ("d|z", "dz"),
55
+ ("ɪ|ɹ", "ɪɹ"),
56
+ ("ɐ", "ɚ"),
57
+ ("ᵻ", "ɪ"),
58
+ ("əl", "l"),
59
+ ("x", "k"),
60
+ ("ɬ", "l"),
61
+ ("ʔ", "t"),
62
+ ("n̩", "n"),
63
+ ("oː|ɹ", "oːɹ"),
64
+ ]
65
+
66
+
67
+ def expand_abbreviations(text):
68
+ for regex, replacement in _abbreviations:
69
+ text = re.sub(regex, replacement, text)
70
+ return text
71
+
72
+
73
+ def _remove_commas(m):
74
+ return m.group(1).replace(",", "")
75
+
76
+
77
+ def _expand_decimal_point(m):
78
+ return m.group(1).replace(".", " point ")
79
+
80
+
81
+ def _expand_percent(m):
82
+ return m.group(1).replace("%", " percent ")
83
+
84
+
85
+ def _expand_dollars(m):
86
+ match = m.group(1)
87
+ parts = match.split(".")
88
+ if len(parts) > 2:
89
+ return " " + match + " dollars " # Unexpected format
90
+ dollars = int(parts[0]) if parts[0] else 0
91
+ cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
92
+ if dollars and cents:
93
+ dollar_unit = "dollar" if dollars == 1 else "dollars"
94
+ cent_unit = "cent" if cents == 1 else "cents"
95
+ return " %s %s, %s %s " % (dollars, dollar_unit, cents, cent_unit)
96
+ elif dollars:
97
+ dollar_unit = "dollar" if dollars == 1 else "dollars"
98
+ return " %s %s " % (dollars, dollar_unit)
99
+ elif cents:
100
+ cent_unit = "cent" if cents == 1 else "cents"
101
+ return " %s %s " % (cents, cent_unit)
102
+ else:
103
+ return " zero dollars "
104
+
105
+
106
+ def fraction_to_words(numerator, denominator):
107
+ if numerator == 1 and denominator == 2:
108
+ return " one half "
109
+ if numerator == 1 and denominator == 4:
110
+ return " one quarter "
111
+ if denominator == 2:
112
+ return " " + _inflect.number_to_words(numerator) + " halves "
113
+ if denominator == 4:
114
+ return " " + _inflect.number_to_words(numerator) + " quarters "
115
+ return (
116
+ " "
117
+ + _inflect.number_to_words(numerator)
118
+ + " "
119
+ + _inflect.ordinal(_inflect.number_to_words(denominator))
120
+ + " "
121
+ )
122
+
123
+
124
+ def _expand_fraction(m):
125
+ numerator = int(m.group(1))
126
+ denominator = int(m.group(2))
127
+ return fraction_to_words(numerator, denominator)
128
+
129
+
130
+ def _expand_ordinal(m):
131
+ return " " + _inflect.number_to_words(m.group(0)) + " "
132
+
133
+
134
+ def _expand_number(m):
135
+ num = int(m.group(0))
136
+ if num > 1000 and num < 3000:
137
+ if num == 2000:
138
+ return " two thousand "
139
+ elif num > 2000 and num < 2010:
140
+ return " two thousand " + _inflect.number_to_words(num % 100) + " "
141
+ elif num % 100 == 0:
142
+ return " " + _inflect.number_to_words(num // 100) + " hundred "
143
+ else:
144
+ return (
145
+ " "
146
+ + _inflect.number_to_words(num, andword="", zero="oh", group=2).replace(
147
+ ", ", " "
148
+ )
149
+ + " "
150
+ )
151
+ else:
152
+ return " " + _inflect.number_to_words(num, andword="") + " "
153
+
154
+
155
+ # Normalize numbers pronunciation
156
+ def normalize_numbers(text):
157
+ text = re.sub(_comma_number_re, _remove_commas, text)
158
+ text = re.sub(_pounds_re, r"\1 pounds", text)
159
+ text = re.sub(_dollars_re, _expand_dollars, text)
160
+ text = re.sub(_fraction_re, _expand_fraction, text)
161
+ text = re.sub(_decimal_number_re, _expand_decimal_point, text)
162
+ text = re.sub(_percent_number_re, _expand_percent, text)
163
+ text = re.sub(_ordinal_re, _expand_ordinal, text)
164
+ text = re.sub(_number_re, _expand_number, text)
165
+ return text
166
+
167
+
168
+ def _english_to_ipa(text):
169
+ # text = unidecode(text).lower()
170
+ text = expand_abbreviations(text)
171
+ text = normalize_numbers(text)
172
+ return text
173
+
174
+
175
+ # special map
176
+ def special_map(text):
177
+ for regex, replacement in _special_map:
178
+ regex = regex.replace("|", "\|")
179
+ while re.search(r"(^|[_|]){}([_|]|$)".format(regex), text):
180
+ text = re.sub(
181
+ r"(^|[_|]){}([_|]|$)".format(regex), r"\1{}\2".format(replacement), text
182
+ )
183
+ # text = re.sub(r'([,.!?])', r'|\1', text)
184
+ return text
185
+
186
+
187
+ # Add some special operation
188
+ def english_to_ipa(text, text_tokenizer):
189
+ if type(text) == str:
190
+ text = _english_to_ipa(text)
191
+ else:
192
+ text = [_english_to_ipa(t) for t in text]
193
+ phonemes = text_tokenizer(text)
194
+ if phonemes[-1] in "p⁼ʰmftnlkxʃs`ɹaoəɛɪeɑʊŋiuɥwæjː":
195
+ phonemes += "|_"
196
+ if type(text) == str:
197
+ return special_map(phonemes)
198
+ else:
199
+ result_ph = []
200
+ for phone in phonemes:
201
+ result_ph.append(special_map(phone))
202
+ return result_ph
diffrhythm/g2p/g2p/french.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2024 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import re
7
+
8
+ """
9
+ Text clean time
10
+ """
11
+ # List of (regular expression, replacement) pairs for abbreviations in french:
12
+ _abbreviations = [
13
+ (re.compile("\\b%s\\." % x[0], re.IGNORECASE), x[1])
14
+ for x in [
15
+ ("M", "monsieur"),
16
+ ("Mlle", "mademoiselle"),
17
+ ("Mlles", "mesdemoiselles"),
18
+ ("Mme", "Madame"),
19
+ ("Mmes", "Mesdames"),
20
+ ("N.B", "nota bene"),
21
+ ("M", "monsieur"),
22
+ ("p.c.q", "parce que"),
23
+ ("Pr", "professeur"),
24
+ ("qqch", "quelque chose"),
25
+ ("rdv", "rendez-vous"),
26
+ ("max", "maximum"),
27
+ ("min", "minimum"),
28
+ ("no", "numéro"),
29
+ ("adr", "adresse"),
30
+ ("dr", "docteur"),
31
+ ("st", "saint"),
32
+ ("co", "companie"),
33
+ ("jr", "junior"),
34
+ ("sgt", "sergent"),
35
+ ("capt", "capitain"),
36
+ ("col", "colonel"),
37
+ ("av", "avenue"),
38
+ ("av. J.-C", "avant Jésus-Christ"),
39
+ ("apr. J.-C", "après Jésus-Christ"),
40
+ ("art", "article"),
41
+ ("boul", "boulevard"),
42
+ ("c.-à-d", "c’est-à-dire"),
43
+ ("etc", "et cetera"),
44
+ ("ex", "exemple"),
45
+ ("excl", "exclusivement"),
46
+ ("boul", "boulevard"),
47
+ ]
48
+ ] + [
49
+ (re.compile("\\b%s" % x[0]), x[1])
50
+ for x in [
51
+ ("Mlle", "mademoiselle"),
52
+ ("Mlles", "mesdemoiselles"),
53
+ ("Mme", "Madame"),
54
+ ("Mmes", "Mesdames"),
55
+ ]
56
+ ]
57
+
58
+ rep_map = {
59
+ ":": ",",
60
+ ";": ",",
61
+ ",": ",",
62
+ "。": ".",
63
+ "!": "!",
64
+ "?": "?",
65
+ "\n": ".",
66
+ "·": ",",
67
+ "、": ",",
68
+ "...": ".",
69
+ "…": ".",
70
+ "$": ".",
71
+ "“": "",
72
+ "”": "",
73
+ "‘": "",
74
+ "’": "",
75
+ "(": "",
76
+ ")": "",
77
+ "(": "",
78
+ ")": "",
79
+ "《": "",
80
+ "》": "",
81
+ "【": "",
82
+ "】": "",
83
+ "[": "",
84
+ "]": "",
85
+ "—": "",
86
+ "~": "-",
87
+ "~": "-",
88
+ "「": "",
89
+ "」": "",
90
+ "¿": "",
91
+ "¡": "",
92
+ }
93
+
94
+
95
+ def collapse_whitespace(text):
96
+ # Regular expression matching whitespace:
97
+ _whitespace_re = re.compile(r"\s+")
98
+ return re.sub(_whitespace_re, " ", text).strip()
99
+
100
+
101
+ def remove_punctuation_at_begin(text):
102
+ return re.sub(r"^[,.!?]+", "", text)
103
+
104
+
105
+ def remove_aux_symbols(text):
106
+ text = re.sub(r"[\<\>\(\)\[\]\"\«\»]+", "", text)
107
+ return text
108
+
109
+
110
+ def replace_symbols(text):
111
+ text = text.replace(";", ",")
112
+ text = text.replace("-", " ")
113
+ text = text.replace(":", ",")
114
+ text = text.replace("&", " et ")
115
+ return text
116
+
117
+
118
+ def expand_abbreviations(text):
119
+ for regex, replacement in _abbreviations:
120
+ text = re.sub(regex, replacement, text)
121
+ return text
122
+
123
+
124
+ def replace_punctuation(text):
125
+ pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
126
+ replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
127
+ return replaced_text
128
+
129
+
130
+ def text_normalize(text):
131
+ text = expand_abbreviations(text)
132
+ text = replace_punctuation(text)
133
+ text = replace_symbols(text)
134
+ text = remove_aux_symbols(text)
135
+ text = remove_punctuation_at_begin(text)
136
+ text = collapse_whitespace(text)
137
+ text = re.sub(r"([^\.,!\?\-…])$", r"\1", text)
138
+ return text
139
+
140
+
141
+ def french_to_ipa(text, text_tokenizer):
142
+ if type(text) == str:
143
+ text = text_normalize(text)
144
+ phonemes = text_tokenizer(text)
145
+ return phonemes
146
+ else:
147
+ for i, t in enumerate(text):
148
+ text[i] = text_normalize(t)
149
+ return text_tokenizer(text)
diffrhythm/g2p/g2p/german.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2024 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import re
7
+
8
+ """
9
+ Text clean time
10
+ """
11
+ rep_map = {
12
+ ":": ",",
13
+ ";": ",",
14
+ ",": ",",
15
+ "。": ".",
16
+ "!": "!",
17
+ "?": "?",
18
+ "\n": ".",
19
+ "·": ",",
20
+ "、": ",",
21
+ "...": ".",
22
+ "…": ".",
23
+ "$": ".",
24
+ "“": "",
25
+ "”": "",
26
+ "‘": "",
27
+ "’": "",
28
+ "(": "",
29
+ ")": "",
30
+ "(": "",
31
+ ")": "",
32
+ "《": "",
33
+ "》": "",
34
+ "【": "",
35
+ "】": "",
36
+ "[": "",
37
+ "]": "",
38
+ "—": "",
39
+ "~": "-",
40
+ "~": "-",
41
+ "「": "",
42
+ "」": "",
43
+ "¿": "",
44
+ "¡": "",
45
+ }
46
+
47
+
48
+ def collapse_whitespace(text):
49
+ # Regular expression matching whitespace:
50
+ _whitespace_re = re.compile(r"\s+")
51
+ return re.sub(_whitespace_re, " ", text).strip()
52
+
53
+
54
+ def remove_punctuation_at_begin(text):
55
+ return re.sub(r"^[,.!?]+", "", text)
56
+
57
+
58
+ def remove_aux_symbols(text):
59
+ text = re.sub(r"[\<\>\(\)\[\]\"\«\»]+", "", text)
60
+ return text
61
+
62
+
63
+ def replace_symbols(text):
64
+ text = text.replace(";", ",")
65
+ text = text.replace("-", " ")
66
+ text = text.replace(":", ",")
67
+ return text
68
+
69
+
70
+ def replace_punctuation(text):
71
+ pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
72
+ replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
73
+ return replaced_text
74
+
75
+
76
+ def text_normalize(text):
77
+ text = replace_punctuation(text)
78
+ text = replace_symbols(text)
79
+ text = remove_aux_symbols(text)
80
+ text = remove_punctuation_at_begin(text)
81
+ text = collapse_whitespace(text)
82
+ text = re.sub(r"([^\.,!\?\-…])$", r"\1", text)
83
+ return text
84
+
85
+
86
+ def german_to_ipa(text, text_tokenizer):
87
+ if type(text) == str:
88
+ text = text_normalize(text)
89
+ phonemes = text_tokenizer(text)
90
+ return phonemes
91
+ else:
92
+ for i, t in enumerate(text):
93
+ text[i] = text_normalize(t)
94
+ return text_tokenizer(text)
diffrhythm/g2p/g2p/japanese.py ADDED
@@ -0,0 +1,816 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2024 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import io, re, os, sys, time, argparse, pdb, json
7
+ from io import StringIO
8
+ from typing import Optional
9
+ import numpy as np
10
+ import traceback
11
+ import pyopenjtalk
12
+ from pykakasi import kakasi
13
+
14
+ punctuation = [",", ".", "!", "?", ":", ";", "'", "…"]
15
+
16
+ jp_xphone2ipa = [
17
+ " a a",
18
+ " i i",
19
+ " u ɯ",
20
+ " e e",
21
+ " o o",
22
+ " a: aː",
23
+ " i: iː",
24
+ " u: ɯː",
25
+ " e: eː",
26
+ " o: oː",
27
+ " k k",
28
+ " s s",
29
+ " t t",
30
+ " n n",
31
+ " h ç",
32
+ " f ɸ",
33
+ " m m",
34
+ " y j",
35
+ " r ɾ",
36
+ " w ɰᵝ",
37
+ " N ɴ",
38
+ " g g",
39
+ " j d ʑ",
40
+ " z z",
41
+ " d d",
42
+ " b b",
43
+ " p p",
44
+ " q q",
45
+ " v v",
46
+ " : :",
47
+ " by b j",
48
+ " ch t ɕ",
49
+ " dy d e j",
50
+ " ty t e j",
51
+ " gy g j",
52
+ " gw g ɯ",
53
+ " hy ç j",
54
+ " ky k j",
55
+ " kw k ɯ",
56
+ " my m j",
57
+ " ny n j",
58
+ " py p j",
59
+ " ry ɾ j",
60
+ " sh ɕ",
61
+ " ts t s ɯ",
62
+ ]
63
+
64
+ _mora_list_minimum: list[tuple[str, Optional[str], str]] = [
65
+ ("ヴォ", "v", "o"),
66
+ ("ヴェ", "v", "e"),
67
+ ("ヴィ", "v", "i"),
68
+ ("ヴァ", "v", "a"),
69
+ ("ヴ", "v", "u"),
70
+ ("ン", None, "N"),
71
+ ("ワ", "w", "a"),
72
+ ("ロ", "r", "o"),
73
+ ("レ", "r", "e"),
74
+ ("ル", "r", "u"),
75
+ ("リョ", "ry", "o"),
76
+ ("リュ", "ry", "u"),
77
+ ("リャ", "ry", "a"),
78
+ ("リェ", "ry", "e"),
79
+ ("リ", "r", "i"),
80
+ ("ラ", "r", "a"),
81
+ ("ヨ", "y", "o"),
82
+ ("ユ", "y", "u"),
83
+ ("ヤ", "y", "a"),
84
+ ("モ", "m", "o"),
85
+ ("メ", "m", "e"),
86
+ ("ム", "m", "u"),
87
+ ("ミョ", "my", "o"),
88
+ ("ミュ", "my", "u"),
89
+ ("ミャ", "my", "a"),
90
+ ("ミェ", "my", "e"),
91
+ ("ミ", "m", "i"),
92
+ ("マ", "m", "a"),
93
+ ("ポ", "p", "o"),
94
+ ("ボ", "b", "o"),
95
+ ("ホ", "h", "o"),
96
+ ("ペ", "p", "e"),
97
+ ("ベ", "b", "e"),
98
+ ("ヘ", "h", "e"),
99
+ ("プ", "p", "u"),
100
+ ("ブ", "b", "u"),
101
+ ("フォ", "f", "o"),
102
+ ("フェ", "f", "e"),
103
+ ("フィ", "f", "i"),
104
+ ("ファ", "f", "a"),
105
+ ("フ", "f", "u"),
106
+ ("ピョ", "py", "o"),
107
+ ("ピュ", "py", "u"),
108
+ ("ピャ", "py", "a"),
109
+ ("ピェ", "py", "e"),
110
+ ("ピ", "p", "i"),
111
+ ("ビョ", "by", "o"),
112
+ ("ビュ", "by", "u"),
113
+ ("ビャ", "by", "a"),
114
+ ("ビェ", "by", "e"),
115
+ ("ビ", "b", "i"),
116
+ ("ヒョ", "hy", "o"),
117
+ ("ヒュ", "hy", "u"),
118
+ ("ヒャ", "hy", "a"),
119
+ ("ヒェ", "hy", "e"),
120
+ ("ヒ", "h", "i"),
121
+ ("パ", "p", "a"),
122
+ ("バ", "b", "a"),
123
+ ("ハ", "h", "a"),
124
+ ("ノ", "n", "o"),
125
+ ("ネ", "n", "e"),
126
+ ("ヌ", "n", "u"),
127
+ ("ニョ", "ny", "o"),
128
+ ("ニュ", "ny", "u"),
129
+ ("ニャ", "ny", "a"),
130
+ ("ニェ", "ny", "e"),
131
+ ("ニ", "n", "i"),
132
+ ("ナ", "n", "a"),
133
+ ("ドゥ", "d", "u"),
134
+ ("ド", "d", "o"),
135
+ ("トゥ", "t", "u"),
136
+ ("ト", "t", "o"),
137
+ ("デョ", "dy", "o"),
138
+ ("デュ", "dy", "u"),
139
+ ("デャ", "dy", "a"),
140
+ # ("デェ", "dy", "e"),
141
+ ("ディ", "d", "i"),
142
+ ("デ", "d", "e"),
143
+ ("テョ", "ty", "o"),
144
+ ("テュ", "ty", "u"),
145
+ ("テャ", "ty", "a"),
146
+ ("ティ", "t", "i"),
147
+ ("テ", "t", "e"),
148
+ ("ツォ", "ts", "o"),
149
+ ("ツェ", "ts", "e"),
150
+ ("ツィ", "ts", "i"),
151
+ ("ツァ", "ts", "a"),
152
+ ("ツ", "ts", "u"),
153
+ ("ッ", None, "q"), # 「cl」から「q」に変更
154
+ ("チョ", "ch", "o"),
155
+ ("チュ", "ch", "u"),
156
+ ("チャ", "ch", "a"),
157
+ ("チェ", "ch", "e"),
158
+ ("チ", "ch", "i"),
159
+ ("ダ", "d", "a"),
160
+ ("タ", "t", "a"),
161
+ ("ゾ", "z", "o"),
162
+ ("ソ", "s", "o"),
163
+ ("ゼ", "z", "e"),
164
+ ("セ", "s", "e"),
165
+ ("ズィ", "z", "i"),
166
+ ("ズ", "z", "u"),
167
+ ("スィ", "s", "i"),
168
+ ("ス", "s", "u"),
169
+ ("ジョ", "j", "o"),
170
+ ("ジュ", "j", "u"),
171
+ ("ジャ", "j", "a"),
172
+ ("ジェ", "j", "e"),
173
+ ("ジ", "j", "i"),
174
+ ("ショ", "sh", "o"),
175
+ ("シュ", "sh", "u"),
176
+ ("シャ", "sh", "a"),
177
+ ("シェ", "sh", "e"),
178
+ ("シ", "sh", "i"),
179
+ ("ザ", "z", "a"),
180
+ ("サ", "s", "a"),
181
+ ("ゴ", "g", "o"),
182
+ ("コ", "k", "o"),
183
+ ("ゲ", "g", "e"),
184
+ ("ケ", "k", "e"),
185
+ ("グヮ", "gw", "a"),
186
+ ("グ", "g", "u"),
187
+ ("クヮ", "kw", "a"),
188
+ ("ク", "k", "u"),
189
+ ("ギョ", "gy", "o"),
190
+ ("ギュ", "gy", "u"),
191
+ ("ギャ", "gy", "a"),
192
+ ("ギェ", "gy", "e"),
193
+ ("ギ", "g", "i"),
194
+ ("キョ", "ky", "o"),
195
+ ("キュ", "ky", "u"),
196
+ ("キャ", "ky", "a"),
197
+ ("キェ", "ky", "e"),
198
+ ("キ", "k", "i"),
199
+ ("ガ", "g", "a"),
200
+ ("カ", "k", "a"),
201
+ ("オ", None, "o"),
202
+ ("エ", None, "e"),
203
+ ("ウォ", "w", "o"),
204
+ ("ウェ", "w", "e"),
205
+ ("ウィ", "w", "i"),
206
+ ("ウ", None, "u"),
207
+ ("イェ", "y", "e"),
208
+ ("イ", None, "i"),
209
+ ("ア", None, "a"),
210
+ ]
211
+
212
+ _mora_list_additional: list[tuple[str, Optional[str], str]] = [
213
+ ("ヴョ", "by", "o"),
214
+ ("ヴュ", "by", "u"),
215
+ ("ヴャ", "by", "a"),
216
+ ("ヲ", None, "o"),
217
+ ("ヱ", None, "e"),
218
+ ("ヰ", None, "i"),
219
+ ("ヮ", "w", "a"),
220
+ ("ョ", "y", "o"),
221
+ ("ュ", "y", "u"),
222
+ ("ヅ", "z", "u"),
223
+ ("ヂ", "j", "i"),
224
+ ("ヶ", "k", "e"),
225
+ ("ャ", "y", "a"),
226
+ ("ォ", None, "o"),
227
+ ("ェ", None, "e"),
228
+ ("ゥ", None, "u"),
229
+ ("ィ", None, "i"),
230
+ ("ァ", None, "a"),
231
+ ]
232
+
233
+ # 例: "vo" -> "ヴォ", "a" -> "ア"
234
+ mora_phonemes_to_mora_kata: dict[str, str] = {
235
+ (consonant or "") + vowel: kana for [kana, consonant, vowel] in _mora_list_minimum
236
+ }
237
+
238
+ # 例: "ヴォ" -> ("v", "o"), "ア" -> (None, "a")
239
+ mora_kata_to_mora_phonemes: dict[str, tuple[Optional[str], str]] = {
240
+ kana: (consonant, vowel)
241
+ for [kana, consonant, vowel] in _mora_list_minimum + _mora_list_additional
242
+ }
243
+
244
+
245
+ # 正規化で記号を変換するための辞書
246
+ rep_map = {
247
+ ":": ":",
248
+ ";": ";",
249
+ ",": ",",
250
+ "。": ".",
251
+ "!": "!",
252
+ "?": "?",
253
+ "\n": ".",
254
+ ".": ".",
255
+ "⋯": "…",
256
+ "···": "…",
257
+ "・・・": "…",
258
+ "·": ",",
259
+ "・": ",",
260
+ "•": ",",
261
+ "、": ",",
262
+ "$": ".",
263
+ # "“": "'",
264
+ # "”": "'",
265
+ # '"': "'",
266
+ "‘": "'",
267
+ "’": "'",
268
+ # "(": "'",
269
+ # ")": "'",
270
+ # "(": "'",
271
+ # ")": "'",
272
+ # "《": "'",
273
+ # "》": "'",
274
+ # "【": "'",
275
+ # "】": "'",
276
+ # "[": "'",
277
+ # "]": "'",
278
+ # "——": "-",
279
+ # "−": "-",
280
+ # "-": "-",
281
+ # "『": "'",
282
+ # "』": "'",
283
+ # "〈": "'",
284
+ # "〉": "'",
285
+ # "«": "'",
286
+ # "»": "'",
287
+ # # "~": "-", # これは長音記号「ー」として扱うよう変更
288
+ # # "~": "-", # これは長音記号「ー」として扱うよう変更
289
+ # "「": "'",
290
+ # "」": "'",
291
+ }
292
+
293
+
294
+ def _numeric_feature_by_regex(regex, s):
295
+ match = re.search(regex, s)
296
+ if match is None:
297
+ return -50
298
+ return int(match.group(1))
299
+
300
+
301
+ def replace_punctuation(text: str) -> str:
302
+ """句読点等を「.」「,」「!」「?」「'」「-」に正規化し、OpenJTalkで読みが取得できるもののみ残す:
303
+ 漢字・平仮名・カタカナ、アルファベット、ギリシャ文字
304
+ """
305
+ pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
306
+ # print("before: ", text)
307
+ # 句読点を辞書で置換
308
+ replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
309
+
310
+ replaced_text = re.sub(
311
+ # ↓ ひらがな、カタカナ、漢字
312
+ r"[^\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF\u3400-\u4DBF\u3005"
313
+ # ↓ 半角アルファベット(大文字と小文字)
314
+ + r"\u0041-\u005A\u0061-\u007A"
315
+ # ↓ 全角アルファベット(大文字と小文字)
316
+ + r"\uFF21-\uFF3A\uFF41-\uFF5A"
317
+ # ↓ ギリシャ文字
318
+ + r"\u0370-\u03FF\u1F00-\u1FFF"
319
+ # ↓ "!", "?", "…", ",", ".", "'", "-", 但し`…`はすでに`...`に変換されている
320
+ + "".join(punctuation) + r"]+",
321
+ # 上述以外の文字を削除
322
+ "",
323
+ replaced_text,
324
+ )
325
+ # print("after: ", replaced_text)
326
+ return replaced_text
327
+
328
+
329
+ def fix_phone_tone(phone_tone_list: list[tuple[str, int]]) -> list[tuple[str, int]]:
330
+ """
331
+ `phone_tone_list`のtone(アクセントの値)を0か1の範囲に修正する。
332
+ 例: [(a, 0), (i, -1), (u, -1)] → [(a, 1), (i, 0), (u, 0)]
333
+ """
334
+ tone_values = set(tone for _, tone in phone_tone_list)
335
+ if len(tone_values) == 1:
336
+ assert tone_values == {0}, tone_values
337
+ return phone_tone_list
338
+ elif len(tone_values) == 2:
339
+ if tone_values == {0, 1}:
340
+ return phone_tone_list
341
+ elif tone_values == {-1, 0}:
342
+ return [
343
+ (letter, 0 if tone == -1 else 1) for letter, tone in phone_tone_list
344
+ ]
345
+ else:
346
+ raise ValueError(f"Unexpected tone values: {tone_values}")
347
+ else:
348
+ raise ValueError(f"Unexpected tone values: {tone_values}")
349
+
350
+
351
+ def fix_phone_tone_wplen(phone_tone_list, word_phone_length_list):
352
+ phones = []
353
+ tones = []
354
+ w_p_len = []
355
+ p_len = len(phone_tone_list)
356
+ idx = 0
357
+ w_idx = 0
358
+ while idx < p_len:
359
+ offset = 0
360
+ if phone_tone_list[idx] == "▁":
361
+ w_p_len.append(w_idx + 1)
362
+
363
+ curr_w_p_len = word_phone_length_list[w_idx]
364
+ for i in range(curr_w_p_len):
365
+ p, t = phone_tone_list[idx]
366
+ if p == ":" and len(phones) > 0:
367
+ if phones[-1][-1] != ":":
368
+ phones[-1] += ":"
369
+ offset -= 1
370
+ else:
371
+ phones.append(p)
372
+ tones.append(str(t))
373
+ idx += 1
374
+ if idx >= p_len:
375
+ break
376
+ w_p_len.append(curr_w_p_len + offset)
377
+ w_idx += 1
378
+ # print(w_p_len)
379
+ return phones, tones, w_p_len
380
+
381
+
382
+ def g2phone_tone_wo_punct(prosodies) -> list[tuple[str, int]]:
383
+ """
384
+ テキストに対して、音素とアクセント(0か1)のペアのリストを返す。
385
+ ただし「!」「.」「?」等の非音素記号(punctuation)は全て消える(ポーズ記号も残さない)。
386
+ 非音素記号を含める処理は`align_tones()`で行われる。
387
+ また「っ」は「cl」でなく「q」に変換される(「ん」は「N」のまま)。
388
+ 例: "こんにちは、世界ー。。元気?!" →
389
+ [('k', 0), ('o', 0), ('N', 1), ('n', 1), ('i', 1), ('ch', 1), ('i', 1), ('w', 1), ('a', 1), ('s', 1), ('e', 1), ('k', 0), ('a', 0), ('i', 0), ('i', 0), ('g', 1), ('e', 1), ('N', 0), ('k', 0), ('i', 0)]
390
+ """
391
+ result: list[tuple[str, int]] = []
392
+ current_phrase: list[tuple[str, int]] = []
393
+ current_tone = 0
394
+ last_accent = ""
395
+ for i, letter in enumerate(prosodies):
396
+ # 特殊記号の処理
397
+
398
+ # 文頭記号、無視する
399
+ if letter == "^":
400
+ assert i == 0, "Unexpected ^"
401
+ # アクセント句の終わりに来る記号
402
+ elif letter in ("$", "?", "_", "#"):
403
+ # 保持しているフレーズを、アクセント数値を0-1に修正し結果に追加
404
+ result.extend(fix_phone_tone(current_phrase))
405
+ # 末尾に来る終了記号、無視(文中の疑問文は`_`になる)
406
+ if letter in ("$", "?"):
407
+ assert i == len(prosodies) - 1, f"Unexpected {letter}"
408
+ # あとは"_"(ポーズ)と"#"(アクセント句の境界)のみ
409
+ # これらは残さず、次のアクセント句に備える。
410
+
411
+ current_phrase = []
412
+ # 0を基準点にしてそこから上昇・下降する(負の場合は上の`fix_phone_tone`で直る)
413
+ current_tone = 0
414
+ last_accent = ""
415
+ # アクセント上昇記号
416
+ elif letter == "[":
417
+ if last_accent != letter:
418
+ current_tone = current_tone + 1
419
+ last_accent = letter
420
+ # アクセント下降記号
421
+ elif letter == "]":
422
+ if last_accent != letter:
423
+ current_tone = current_tone - 1
424
+ last_accent = letter
425
+ # それ以外は通常の音素
426
+ else:
427
+ if letter == "cl": # 「っ」の処理
428
+ letter = "q"
429
+ current_phrase.append((letter, current_tone))
430
+ return result
431
+
432
+
433
+ def handle_long(sep_phonemes: list[list[str]]) -> list[list[str]]:
434
+ for i in range(len(sep_phonemes)):
435
+ if sep_phonemes[i][0] == "ー":
436
+ # sep_phonemes[i][0] = sep_phonemes[i - 1][-1]
437
+ sep_phonemes[i][0] = ":"
438
+ if "ー" in sep_phonemes[i]:
439
+ for j in range(len(sep_phonemes[i])):
440
+ if sep_phonemes[i][j] == "ー":
441
+ # sep_phonemes[i][j] = sep_phonemes[i][j - 1][-1]
442
+ sep_phonemes[i][j] = ":"
443
+ return sep_phonemes
444
+
445
+
446
+ def handle_long_word(sep_phonemes: list[list[str]]) -> list[list[str]]:
447
+ res = []
448
+ for i in range(len(sep_phonemes)):
449
+ if sep_phonemes[i][0] == "ー":
450
+ sep_phonemes[i][0] = sep_phonemes[i - 1][-1]
451
+ # sep_phonemes[i][0] = ':'
452
+ if "ー" in sep_phonemes[i]:
453
+ for j in range(len(sep_phonemes[i])):
454
+ if sep_phonemes[i][j] == "ー":
455
+ sep_phonemes[i][j] = sep_phonemes[i][j - 1][-1]
456
+ # sep_phonemes[i][j] = ':'
457
+ res.append(sep_phonemes[i])
458
+ res.append("▁")
459
+ return res
460
+
461
+
462
+ def align_tones(
463
+ phones_with_punct: list[str], phone_tone_list: list[tuple[str, int]]
464
+ ) -> list[tuple[str, int]]:
465
+ """
466
+ 例:
467
+ …私は、、そう思う。
468
+ phones_with_punct:
469
+ [".", ".", ".", "w", "a", "t", "a", "sh", "i", "w", "a", ",", ",", "s", "o", "o", "o", "m", "o", "u", "."]
470
+ phone_tone_list:
471
+ [("w", 0), ("a", 0), ("t", 1), ("a", 1), ("sh", 1), ("i", 1), ("w", 1), ("a", 1), ("s", 0), ("o", 0), ("o", 1), ("o", 1), ("m", 1), ("o", 1), ("u", 0))]
472
+ Return:
473
+ [(".", 0), (".", 0), (".", 0), ("w", 0), ("a", 0), ("t", 1), ("a", 1), ("sh", 1), ("i", 1), ("w", 1), ("a", 1), (",", 0), (",", 0), ("s", 0), ("o", 0), ("o", 1), ("o", 1), ("m", 1), ("o", 1), ("u", 0), (".", 0)]
474
+ """
475
+ result: list[tuple[str, int]] = []
476
+ tone_index = 0
477
+ for phone in phones_with_punct:
478
+ if tone_index >= len(phone_tone_list):
479
+ # 余ったpunctuationがある場合 → (punctuation, 0)を追加
480
+ result.append((phone, 0))
481
+ elif phone == phone_tone_list[tone_index][0]:
482
+ # phone_tone_listの現在の音素と一致する場合 → toneをそこから取得、(phone, tone)を追加
483
+ result.append((phone, phone_tone_list[tone_index][1]))
484
+ # 探すindexを1つ進める
485
+ tone_index += 1
486
+ elif phone in punctuation or phone == "▁":
487
+ # phoneがpunctuationの場合 → (phone, 0)を追加
488
+ result.append((phone, 0))
489
+ else:
490
+ print(f"phones: {phones_with_punct}")
491
+ print(f"phone_tone_list: {phone_tone_list}")
492
+ print(f"result: {result}")
493
+ print(f"tone_index: {tone_index}")
494
+ print(f"phone: {phone}")
495
+ raise ValueError(f"Unexpected phone: {phone}")
496
+ return result
497
+
498
+
499
+ def kata2phoneme_list(text: str) -> list[str]:
500
+ """
501
+ 原則カタカナの`text`を受け取り、それをそのままいじらずに音素記号のリストに変換。
502
+ 注意点:
503
+ - punctuationが来た場合(punctuationが1文字の場合がありうる)、処理せず1文字のリストを返す
504
+ - 冒頭に続く「ー」はそのまま「ー」のままにする(`handle_long()`で処理される)
505
+ - 文中の「ー」は前の音素記号の最後の音素記号に変換される。
506
+ 例:
507
+ `ーーソーナノカーー` → ["ー", "ー", "s", "o", "o", "n", "a", "n", "o", "k", "a", "a", "a"]
508
+ `?` → ["?"]
509
+ """
510
+ if text in punctuation:
511
+ return [text]
512
+ # `text`がカタカナ(`ー`含む)のみからなるかどうかをチェック
513
+ if re.fullmatch(r"[\u30A0-\u30FF]+", text) is None:
514
+ raise ValueError(f"Input must be katakana only: {text}")
515
+ sorted_keys = sorted(mora_kata_to_mora_phonemes.keys(), key=len, reverse=True)
516
+ pattern = "|".join(map(re.escape, sorted_keys))
517
+
518
+ def mora2phonemes(mora: str) -> str:
519
+ cosonant, vowel = mora_kata_to_mora_phonemes[mora]
520
+ if cosonant is None:
521
+ return f" {vowel}"
522
+ return f" {cosonant} {vowel}"
523
+
524
+ spaced_phonemes = re.sub(pattern, lambda m: mora2phonemes(m.group()), text)
525
+
526
+ # 長音記号「ー」の処理
527
+ long_pattern = r"(\w)(ー*)"
528
+ long_replacement = lambda m: m.group(1) + (" " + m.group(1)) * len(m.group(2))
529
+ spaced_phonemes = re.sub(long_pattern, long_replacement, spaced_phonemes)
530
+ # spaced_phonemes += ' ▁'
531
+ return spaced_phonemes.strip().split(" ")
532
+
533
+
534
+ def frontend2phoneme(labels, drop_unvoiced_vowels=False):
535
+ N = len(labels)
536
+
537
+ phones = []
538
+ for n in range(N):
539
+ lab_curr = labels[n]
540
+ # print(lab_curr)
541
+ # current phoneme
542
+ p3 = re.search(r"\-(.*?)\+", lab_curr).group(1)
543
+
544
+ # deal unvoiced vowels as normal vowels
545
+ if drop_unvoiced_vowels and p3 in "AEIOU":
546
+ p3 = p3.lower()
547
+
548
+ # deal with sil at the beginning and the end of text
549
+ if p3 == "sil":
550
+ # assert n == 0 or n == N - 1
551
+ # if n == 0:
552
+ # phones.append("^")
553
+ # elif n == N - 1:
554
+ # # check question form or not
555
+ # e3 = _numeric_feature_by_regex(r"!(\d+)_", lab_curr)
556
+ # if e3 == 0:
557
+ # phones.append("$")
558
+ # elif e3 == 1:
559
+ # phones.append("?")
560
+ continue
561
+ elif p3 == "pau":
562
+ phones.append("_")
563
+ continue
564
+ else:
565
+ phones.append(p3)
566
+
567
+ # accent type and position info (forward or backward)
568
+ a1 = _numeric_feature_by_regex(r"/A:([0-9\-]+)\+", lab_curr)
569
+ a2 = _numeric_feature_by_regex(r"\+(\d+)\+", lab_curr)
570
+ a3 = _numeric_feature_by_regex(r"\+(\d+)/", lab_curr)
571
+
572
+ # number of mora in accent phrase
573
+ f1 = _numeric_feature_by_regex(r"/F:(\d+)_", lab_curr)
574
+
575
+ a2_next = _numeric_feature_by_regex(r"\+(\d+)\+", labels[n + 1])
576
+ # accent phrase border
577
+ # print(p3, a1, a2, a3, f1, a2_next, lab_curr)
578
+ if a3 == 1 and a2_next == 1 and p3 in "aeiouAEIOUNcl":
579
+ phones.append("#")
580
+ # pitch falling
581
+ elif a1 == 0 and a2_next == a2 + 1 and a2 != f1:
582
+ phones.append("]")
583
+ # pitch rising
584
+ elif a2 == 1 and a2_next == 2:
585
+ phones.append("[")
586
+
587
+ # phones = ' '.join(phones)
588
+ return phones
589
+
590
+
591
+ class JapanesePhoneConverter(object):
592
+ def __init__(self, lexicon_path=None, ipa_dict_path=None):
593
+ # lexicon_lines = open(lexicon_path, 'r', encoding='utf-8').readlines()
594
+ # self.lexicon = {}
595
+ # self.single_dict = {}
596
+ # self.double_dict = {}
597
+ # for curr_line in lexicon_lines:
598
+ # k,v = curr_line.strip().split('+',1)
599
+ # self.lexicon[k] = v
600
+ # if len(k) == 2:
601
+ # self.double_dict[k] = v
602
+ # elif len(k) == 1:
603
+ # self.single_dict[k] = v
604
+ self.ipa_dict = {}
605
+ for curr_line in jp_xphone2ipa:
606
+ k, v = curr_line.strip().split(" ", 1)
607
+ self.ipa_dict[k] = re.sub("\s", "", v)
608
+ # kakasi1 = kakasi()
609
+ # kakasi1.setMode("H","K")
610
+ # kakasi1.setMode("J","K")
611
+ # kakasi1.setMode("r","Hepburn")
612
+ self.japan_JH2K = kakasi()
613
+ self.table = {ord(f): ord(t) for f, t in zip("67", "_¯")}
614
+
615
+ def text2sep_kata(self, parsed) -> tuple[list[str], list[str]]:
616
+ """
617
+ `text_normalize`で正規化済みの`norm_text`を受け取り、それを単語分割し、
618
+ 分割された単語リストとその読み(カタカナor記号1文字)のリス���のタプルを返す。
619
+ 単語分割結果は、`g2p()`の`word2ph`で1文字あたりに割り振る音素記号の数を決めるために使う。
620
+ 例:
621
+ `私はそう思う!って感じ?` →
622
+ ["私", "は", "そう", "思う", "!", "って", "感じ", "?"], ["ワタシ", "ワ", "ソー", "オモウ", "!", "ッテ", "カンジ", "?"]
623
+ """
624
+ # parsed: OpenJTalkの解析結果
625
+ sep_text: list[str] = []
626
+ sep_kata: list[str] = []
627
+ fix_parsed = []
628
+ i = 0
629
+ while i <= len(parsed) - 1:
630
+ # word: 実際の単語の文字列
631
+ # yomi: その読み、但し無声化サインの`’`は除去
632
+ # print(parsed)
633
+ yomi = parsed[i]["pron"]
634
+ tmp_parsed = parsed[i]
635
+ if i != len(parsed) - 1 and parsed[i + 1]["string"] in [
636
+ "々",
637
+ "ゝ",
638
+ "ヽ",
639
+ "ゞ",
640
+ "ヾ",
641
+ "゛",
642
+ ]:
643
+ word = parsed[i]["string"] + parsed[i + 1]["string"]
644
+ i += 1
645
+ else:
646
+ word = parsed[i]["string"]
647
+ word, yomi = replace_punctuation(word), yomi.replace("’", "")
648
+ """
649
+ ここで`yomi`の取りうる値は以下の通りのはず。
650
+ - `word`が通常単語 → 通常の読み(カタカナ)
651
+ (カタカナからなり、長音記号も含みうる、`アー` 等)
652
+ - `word`が`ー` から始まる → `ーラー` や `ーーー` など
653
+ - `word`が句読点や空白等 → `、`
654
+ - `word`が`?` → `?`(全角になる)
655
+ 他にも`word`が読めないキリル文字アラビア文字等が来ると`、`になるが、正規化でこの場合は起きないはず。
656
+ また元のコードでは`yomi`が空白の場合の処理があったが、これは起きないはず。
657
+ 処理すべきは`yomi`が`、`の場合のみのはず。
658
+ """
659
+ assert yomi != "", f"Empty yomi: {word}"
660
+ if yomi == "、":
661
+ # wordは正規化されているので、`.`, `,`, `!`, `'`, `-`のいずれか
662
+ if word not in (
663
+ ".",
664
+ ",",
665
+ "!",
666
+ "'",
667
+ "-",
668
+ "?",
669
+ ":",
670
+ ";",
671
+ "…",
672
+ "",
673
+ ):
674
+ # ここはpyopenjtalkが読めない文字等のときに起こる
675
+ #print(
676
+ # "{}Cannot read:{}, yomi:{}, new_word:{};".format(
677
+ # parsed, word, yomi, self.japan_JH2K.convert(word)[0]["kana"]
678
+ # )
679
+ #)
680
+ # raise ValueError(word)
681
+ word = self.japan_JH2K.convert(word)[0]["kana"]
682
+ # print(word, self.japan_JH2K.convert(word)[0]['kana'], kata2phoneme_list(self.japan_JH2K.convert(word)[0]['kana']))
683
+ tmp_parsed["pron"] = word
684
+ # yomi = "-"
685
+ # word = ','
686
+ # yomiは元の記号のままに変更
687
+ # else:
688
+ # parsed[i]['pron'] = parsed[i]["string"]
689
+ yomi = word
690
+ elif yomi == "?":
691
+ assert word == "?", f"yomi `?` comes from: {word}"
692
+ yomi = "?"
693
+ if word == "":
694
+ i += 1
695
+ continue
696
+ sep_text.append(word)
697
+ sep_kata.append(yomi)
698
+ # print(word, yomi, parts)
699
+ fix_parsed.append(tmp_parsed)
700
+ i += 1
701
+ # print(sep_text, sep_kata)
702
+ return sep_text, sep_kata, fix_parsed
703
+
704
+ def getSentencePhone(self, sentence, blank_mode=True, phoneme_mode=False):
705
+ # print("origin:", sentence)
706
+ words = []
707
+ words_phone_len = []
708
+ short_char_flag = False
709
+ output_duration_flag = []
710
+ output_before_sil_flag = []
711
+ normed_text = []
712
+ sentence = sentence.strip().strip("'")
713
+ sentence = re.sub(r"\s+", "", sentence)
714
+ output_res = []
715
+ failed_words = []
716
+ last_long_pause = 4
717
+ last_word = None
718
+ frontend_text = pyopenjtalk.run_frontend(sentence)
719
+ # print("frontend_text: ", frontend_text)
720
+ try:
721
+ frontend_text = pyopenjtalk.estimate_accent(frontend_text)
722
+ except:
723
+ pass
724
+ # print("estimate_accent: ", frontend_text)
725
+ # sep_text: 単語単位の単語のリスト
726
+ # sep_kata: 単語単位の単語のカタカナ読みのリスト
727
+ sep_text, sep_kata, frontend_text = self.text2sep_kata(frontend_text)
728
+ # print("sep_text: ", sep_text)
729
+ # print("sep_kata: ", sep_kata)
730
+ # print("frontend_text: ", frontend_text)
731
+ # sep_phonemes: 各単語ご���の音素のリストのリスト
732
+ sep_phonemes = handle_long_word([kata2phoneme_list(i) for i in sep_kata])
733
+ # print("sep_phonemes: ", sep_phonemes)
734
+
735
+ pron_text = [x["pron"].strip().replace("’", "") for x in frontend_text]
736
+ # pdb.set_trace()
737
+ prosodys = pyopenjtalk.make_label(frontend_text)
738
+ prosodys = frontend2phoneme(prosodys, drop_unvoiced_vowels=True)
739
+ # print("prosodys: ", ' '.join(prosodys))
740
+ # print("pron_text: ", pron_text)
741
+ normed_text = [x["string"].strip() for x in frontend_text]
742
+ # punctuationがすべて消えた、音素とアクセントのタプルのリスト
743
+ phone_tone_list_wo_punct = g2phone_tone_wo_punct(prosodys)
744
+ # print("phone_tone_list_wo_punct: ", phone_tone_list_wo_punct)
745
+
746
+ # phone_w_punct: sep_phonemesを結合した、punctuationを元のまま保持した音素列
747
+ phone_w_punct: list[str] = []
748
+ w_p_len = []
749
+ for i in sep_phonemes:
750
+ phone_w_punct += i
751
+ w_p_len.append(len(i))
752
+ phone_w_punct = phone_w_punct[:-1]
753
+ # punctuation無しのアクセント情報を使って、punctuationを含めたアクセント情報を作る
754
+ # print("phone_w_punct: ", phone_w_punct)
755
+ # print("phone_tone_list_wo_punct: ", phone_tone_list_wo_punct)
756
+ phone_tone_list = align_tones(phone_w_punct, phone_tone_list_wo_punct)
757
+
758
+ jp_item = {}
759
+ jp_p = ""
760
+ jp_t = ""
761
+ # mye rye pye bye nye
762
+ # je she
763
+ # print(phone_tone_list)
764
+ for p, t in phone_tone_list:
765
+ if p in self.ipa_dict:
766
+ curr_p = self.ipa_dict[p]
767
+ jp_p += curr_p
768
+ jp_t += str(t + 6) * len(curr_p)
769
+ elif p in punctuation:
770
+ jp_p += p
771
+ jp_t += "0"
772
+ elif p == "▁":
773
+ jp_p += p
774
+ jp_t += " "
775
+ else:
776
+ print(p, t)
777
+ jp_p += "|"
778
+ jp_t += "0"
779
+ # return phones, tones, w_p_len
780
+ jp_p = jp_p.replace("▁", " ")
781
+ jp_t = jp_t.translate(self.table)
782
+ jp_l = ""
783
+ for t in jp_t:
784
+ if t == " ":
785
+ jp_l += " "
786
+ else:
787
+ jp_l += "2"
788
+ # print(jp_p)
789
+ # print(jp_t)
790
+ # print(jp_l)
791
+ # print(len(jp_p_len), sum(w_p_len), len(jp_p), sum(jp_p_len))
792
+ assert len(jp_p) == len(jp_t) and len(jp_p) == len(jp_l)
793
+
794
+ jp_item["jp_p"] = jp_p.replace("| |", "|").rstrip("|")
795
+ jp_item["jp_t"] = jp_t
796
+ jp_item["jp_l"] = jp_l
797
+ jp_item["jp_normed_text"] = " ".join(normed_text)
798
+ jp_item["jp_pron_text"] = " ".join(pron_text)
799
+ # jp_item['jp_ruoma'] = sep_phonemes
800
+ # print(len(normed_text), len(sep_phonemes))
801
+ # print(normed_text)
802
+ return jp_item
803
+
804
+
805
+ jpc = JapanesePhoneConverter()
806
+
807
+
808
+ def japanese_to_ipa(text, text_tokenizer):
809
+ # phonemes = text_tokenizer(text)
810
+ if type(text) == str:
811
+ return jpc.getSentencePhone(text)["jp_p"]
812
+ else:
813
+ result_ph = []
814
+ for t in text:
815
+ result_ph.append(jpc.getSentencePhone(t)["jp_p"])
816
+ return result_ph
diffrhythm/g2p/g2p/korean.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2024 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import re
7
+
8
+ """
9
+ Text clean time
10
+ """
11
+ english_dictionary = {
12
+ "KOREA": "코리아",
13
+ "IDOL": "아이돌",
14
+ "IT": "아이티",
15
+ "IQ": "아이큐",
16
+ "UP": "업",
17
+ "DOWN": "다운",
18
+ "PC": "피씨",
19
+ "CCTV": "씨씨티비",
20
+ "SNS": "에스엔에스",
21
+ "AI": "에이아이",
22
+ "CEO": "씨이오",
23
+ "A": "에이",
24
+ "B": "비",
25
+ "C": "씨",
26
+ "D": "디",
27
+ "E": "이",
28
+ "F": "에프",
29
+ "G": "지",
30
+ "H": "에이치",
31
+ "I": "아이",
32
+ "J": "제이",
33
+ "K": "케이",
34
+ "L": "엘",
35
+ "M": "엠",
36
+ "N": "엔",
37
+ "O": "오",
38
+ "P": "피",
39
+ "Q": "큐",
40
+ "R": "알",
41
+ "S": "에스",
42
+ "T": "티",
43
+ "U": "유",
44
+ "V": "브이",
45
+ "W": "더블유",
46
+ "X": "엑스",
47
+ "Y": "와이",
48
+ "Z": "제트",
49
+ }
50
+
51
+
52
+ def normalize(text):
53
+ text = text.strip()
54
+ text = re.sub(
55
+ "[⺀-⺙⺛-⻳⼀-⿕々〇〡-〩〸-〺〻㐀-䶵一-鿃豈-鶴侮-頻並-龎]", "", text
56
+ )
57
+ text = normalize_english(text)
58
+ text = text.lower()
59
+ return text
60
+
61
+
62
+ def normalize_english(text):
63
+ def fn(m):
64
+ word = m.group()
65
+ if word in english_dictionary:
66
+ return english_dictionary.get(word)
67
+ return word
68
+
69
+ text = re.sub("([A-Za-z]+)", fn, text)
70
+ return text
71
+
72
+
73
+ def korean_to_ipa(text, text_tokenizer):
74
+ if type(text) == str:
75
+ text = normalize(text)
76
+ phonemes = text_tokenizer(text)
77
+ return phonemes
78
+ else:
79
+ for i, t in enumerate(text):
80
+ text[i] = normalize(t)
81
+ return text_tokenizer(text)
diffrhythm/g2p/g2p/mandarin.py ADDED
@@ -0,0 +1,600 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2024 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import re
7
+ import jieba
8
+ import cn2an
9
+ from pypinyin import lazy_pinyin, BOPOMOFO
10
+ from typing import List
11
+ from diffrhythm.g2p.g2p.chinese_model_g2p import BertPolyPredict
12
+ from diffrhythm.g2p.utils.front_utils import *
13
+ import os
14
+ from huggingface_hub import hf_hub_download
15
+
16
+ # from g2pw import G2PWConverter
17
+
18
+
19
+ # set blank level, {0:"none",1:"char", 2:"word"}
20
+ BLANK_LEVEL = 0
21
+
22
+ # conv = G2PWConverter(style='pinyin', enable_non_tradional_chinese=True)
23
+ resource_path = r"./diffrhythm/g2p"
24
+ poly_all_class_path = os.path.join(
25
+ resource_path, "sources", "g2p_chinese_model", "polychar.txt"
26
+ )
27
+ if not os.path.exists(poly_all_class_path):
28
+ print(
29
+ "Incorrect path for polyphonic character class dictionary: {}, please check...".format(
30
+ poly_all_class_path
31
+ )
32
+ )
33
+ exit()
34
+ poly_dict = generate_poly_lexicon(poly_all_class_path)
35
+
36
+ # Set up G2PW model parameters
37
+ g2pw_poly_model_path = os.path.join(resource_path, "sources", "g2p_chinese_model")
38
+ if not os.path.exists(g2pw_poly_model_path):
39
+ print(
40
+ "Incorrect path for g2pw polyphonic character model: {}, please check...".format(
41
+ g2pw_poly_model_path
42
+ )
43
+ )
44
+ exit()
45
+
46
+ json_file_path = os.path.join(
47
+ resource_path, "sources", "g2p_chinese_model", "polydict.json"
48
+ )
49
+ if not os.path.exists(json_file_path):
50
+ print(
51
+ "Incorrect path for g2pw id to pinyin dictionary: {}, please check...".format(
52
+ json_file_path
53
+ )
54
+ )
55
+ exit()
56
+
57
+ jsonr_file_path = os.path.join(
58
+ resource_path, "sources", "g2p_chinese_model", "polydict_r.json"
59
+ )
60
+ if not os.path.exists(jsonr_file_path):
61
+ print(
62
+ "Incorrect path for g2pw pinyin to id dictionary: {}, please check...".format(
63
+ jsonr_file_path
64
+ )
65
+ )
66
+ exit()
67
+
68
+ g2pw_poly_predict = BertPolyPredict(
69
+ g2pw_poly_model_path, jsonr_file_path, json_file_path
70
+ )
71
+
72
+
73
+ """
74
+ Text clean time
75
+ """
76
+ # List of (Latin alphabet, bopomofo) pairs:
77
+ _latin_to_bopomofo = [
78
+ (re.compile("%s" % x[0], re.IGNORECASE), x[1])
79
+ for x in [
80
+ ("a", "ㄟˉ"),
81
+ ("b", "ㄅㄧˋ"),
82
+ ("c", "ㄙㄧˉ"),
83
+ ("d", "ㄉㄧˋ"),
84
+ ("e", "ㄧˋ"),
85
+ ("f", "ㄝˊㄈㄨˋ"),
86
+ ("g", "ㄐㄧˋ"),
87
+ ("h", "ㄝˇㄑㄩˋ"),
88
+ ("i", "ㄞˋ"),
89
+ ("j", "ㄐㄟˋ"),
90
+ ("k", "ㄎㄟˋ"),
91
+ ("l", "ㄝˊㄛˋ"),
92
+ ("m", "ㄝˊㄇㄨˋ"),
93
+ ("n", "ㄣˉ"),
94
+ ("o", "ㄡˉ"),
95
+ ("p", "ㄆㄧˉ"),
96
+ ("q", "ㄎㄧㄡˉ"),
97
+ ("r", "ㄚˋ"),
98
+ ("s", "ㄝˊㄙˋ"),
99
+ ("t", "ㄊㄧˋ"),
100
+ ("u", "ㄧㄡˉ"),
101
+ ("v", "ㄨㄧˉ"),
102
+ ("w", "ㄉㄚˋㄅㄨˋㄌㄧㄡˋ"),
103
+ ("x", "ㄝˉㄎㄨˋㄙˋ"),
104
+ ("y", "ㄨㄞˋ"),
105
+ ("z", "ㄗㄟˋ"),
106
+ ]
107
+ ]
108
+
109
+ # List of (bopomofo, ipa) pairs:
110
+ _bopomofo_to_ipa = [
111
+ (re.compile("%s" % x[0]), x[1])
112
+ for x in [
113
+ ("ㄅㄛ", "p⁼wo"),
114
+ ("ㄆㄛ", "pʰwo"),
115
+ ("ㄇㄛ", "mwo"),
116
+ ("ㄈㄛ", "fwo"),
117
+ ("ㄧㄢ", "|jɛn"),
118
+ ("ㄩㄢ", "|ɥæn"),
119
+ ("ㄧㄣ", "|in"),
120
+ ("ㄩㄣ", "|ɥn"),
121
+ ("ㄧㄥ", "|iŋ"),
122
+ ("ㄨㄥ", "|ʊŋ"),
123
+ ("ㄩㄥ", "|jʊŋ"),
124
+ # Add
125
+ ("ㄧㄚ", "|ia"),
126
+ ("ㄧㄝ", "|iɛ"),
127
+ ("ㄧㄠ", "|iɑʊ"),
128
+ ("ㄧㄡ", "|ioʊ"),
129
+ ("ㄧㄤ", "|iɑŋ"),
130
+ ("ㄨㄚ", "|ua"),
131
+ ("ㄨㄛ", "|uo"),
132
+ ("ㄨㄞ", "|uaɪ"),
133
+ ("ㄨㄟ", "|ueɪ"),
134
+ ("ㄨㄢ", "|uan"),
135
+ ("ㄨㄣ", "|uən"),
136
+ ("ㄨㄤ", "|uɑŋ"),
137
+ ("ㄩㄝ", "|ɥɛ"),
138
+ # End
139
+ ("ㄅ", "p⁼"),
140
+ ("ㄆ", "pʰ"),
141
+ ("ㄇ", "m"),
142
+ ("ㄈ", "f"),
143
+ ("ㄉ", "t⁼"),
144
+ ("ㄊ", "tʰ"),
145
+ ("ㄋ", "n"),
146
+ ("ㄌ", "l"),
147
+ ("ㄍ", "k⁼"),
148
+ ("ㄎ", "kʰ"),
149
+ ("ㄏ", "x"),
150
+ ("ㄐ", "tʃ⁼"),
151
+ ("ㄑ", "tʃʰ"),
152
+ ("ㄒ", "ʃ"),
153
+ ("ㄓ", "ts`⁼"),
154
+ ("ㄔ", "ts`ʰ"),
155
+ ("ㄕ", "s`"),
156
+ ("ㄖ", "ɹ`"),
157
+ ("ㄗ", "ts⁼"),
158
+ ("ㄘ", "tsʰ"),
159
+ ("ㄙ", "|s"),
160
+ ("ㄚ", "|a"),
161
+ ("ㄛ", "|o"),
162
+ ("ㄜ", "|ə"),
163
+ ("ㄝ", "|ɛ"),
164
+ ("ㄞ", "|aɪ"),
165
+ ("ㄟ", "|eɪ"),
166
+ ("ㄠ", "|ɑʊ"),
167
+ ("ㄡ", "|oʊ"),
168
+ ("ㄢ", "|an"),
169
+ ("ㄣ", "|ən"),
170
+ ("ㄤ", "|ɑŋ"),
171
+ ("ㄥ", "|əŋ"),
172
+ ("ㄦ", "əɹ"),
173
+ ("ㄧ", "|i"),
174
+ ("ㄨ", "|u"),
175
+ ("ㄩ", "|ɥ"),
176
+ ("ˉ", "→|"),
177
+ ("ˊ", "↑|"),
178
+ ("ˇ", "↓↑|"),
179
+ ("ˋ", "↓|"),
180
+ ("˙", "|"),
181
+ ]
182
+ ]
183
+ must_not_er_words = {"女儿", "老儿", "男儿", "少儿", "小儿"}
184
+
185
+
186
+ chinese_lexicon_path = hf_hub_download(
187
+ repo_id="ASLP-lab/DiffRhythm",
188
+ filename="diffrhythm/g2p/sources/chinese_lexicon.txt",
189
+ repo_type="space"
190
+ )
191
+ word_pinyin_dict = {}
192
+ with open(chinese_lexicon_path, "r", encoding="utf-8") as fread:
193
+ txt_list = fread.readlines()
194
+ for txt in txt_list:
195
+ word, pinyin = txt.strip().split("\t")
196
+ word_pinyin_dict[word] = pinyin
197
+ fread.close()
198
+
199
+ pinyin_2_bopomofo_dict = {}
200
+ with open(
201
+ r"./diffrhythm/g2p/sources/pinyin_2_bpmf.txt", "r", encoding="utf-8"
202
+ ) as fread:
203
+ txt_list = fread.readlines()
204
+ for txt in txt_list:
205
+ pinyin, bopomofo = txt.strip().split("\t")
206
+ pinyin_2_bopomofo_dict[pinyin] = bopomofo
207
+ fread.close()
208
+
209
+ tone_dict = {
210
+ "0": "˙",
211
+ "5": "˙",
212
+ "1": "",
213
+ "2": "ˊ",
214
+ "3": "ˇ",
215
+ "4": "ˋ",
216
+ }
217
+
218
+ bopomofos2pinyin_dict = {}
219
+ with open(
220
+ r"./diffrhythm/g2p/sources/bpmf_2_pinyin.txt", "r", encoding="utf-8"
221
+ ) as fread:
222
+ txt_list = fread.readlines()
223
+ for txt in txt_list:
224
+ v, k = txt.strip().split("\t")
225
+ bopomofos2pinyin_dict[k] = v
226
+ fread.close()
227
+
228
+
229
+ def bpmf_to_pinyin(text):
230
+ bopomofo_list = text.split("|")
231
+ pinyin_list = []
232
+ for info in bopomofo_list:
233
+ pinyin = ""
234
+ for c in info:
235
+ if c in bopomofos2pinyin_dict:
236
+ pinyin += bopomofos2pinyin_dict[c]
237
+ if len(pinyin) == 0:
238
+ continue
239
+ if pinyin[-1] not in "01234":
240
+ pinyin += "1"
241
+ if pinyin[:-1] == "ve":
242
+ pinyin = "y" + pinyin
243
+ if pinyin[:-1] == "sh":
244
+ pinyin = pinyin[:-1] + "i" + pinyin[-1]
245
+ if pinyin == "sh":
246
+ pinyin = pinyin[:-1] + "i"
247
+ if pinyin[:-1] == "s":
248
+ pinyin = "si" + pinyin[-1]
249
+ if pinyin[:-1] == "c":
250
+ pinyin = "ci" + pinyin[-1]
251
+ if pinyin[:-1] == "i":
252
+ pinyin = "yi" + pinyin[-1]
253
+ if pinyin[:-1] == "iou":
254
+ pinyin = "you" + pinyin[-1]
255
+ if pinyin[:-1] == "ien":
256
+ pinyin = "yin" + pinyin[-1]
257
+ if "iou" in pinyin and pinyin[-4:-1] == "iou":
258
+ pinyin = pinyin[:-4] + "iu" + pinyin[-1]
259
+ if "uei" in pinyin:
260
+ if pinyin[:-1] == "uei":
261
+ pinyin = "wei" + pinyin[-1]
262
+ elif pinyin[-4:-1] == "uei":
263
+ pinyin = pinyin[:-4] + "ui" + pinyin[-1]
264
+ if "uen" in pinyin and pinyin[-4:-1] == "uen":
265
+ if pinyin[:-1] == "uen":
266
+ pinyin = "wen" + pinyin[-1]
267
+ elif pinyin[-4:-1] == "uei":
268
+ pinyin = pinyin[:-4] + "un" + pinyin[-1]
269
+ if "van" in pinyin and pinyin[-4:-1] == "van":
270
+ if pinyin[:-1] == "van":
271
+ pinyin = "yuan" + pinyin[-1]
272
+ elif pinyin[-4:-1] == "van":
273
+ pinyin = pinyin[:-4] + "uan" + pinyin[-1]
274
+ if "ueng" in pinyin and pinyin[-5:-1] == "ueng":
275
+ pinyin = pinyin[:-5] + "ong" + pinyin[-1]
276
+ if pinyin[:-1] == "veng":
277
+ pinyin = "yong" + pinyin[-1]
278
+ if "veng" in pinyin and pinyin[-5:-1] == "veng":
279
+ pinyin = pinyin[:-5] + "iong" + pinyin[-1]
280
+ if pinyin[:-1] == "ieng":
281
+ pinyin = "ying" + pinyin[-1]
282
+ if pinyin[:-1] == "u":
283
+ pinyin = "wu" + pinyin[-1]
284
+ if pinyin[:-1] == "v":
285
+ pinyin = "yv" + pinyin[-1]
286
+ if pinyin[:-1] == "ing":
287
+ pinyin = "ying" + pinyin[-1]
288
+ if pinyin[:-1] == "z":
289
+ pinyin = "zi" + pinyin[-1]
290
+ if pinyin[:-1] == "zh":
291
+ pinyin = "zhi" + pinyin[-1]
292
+ if pinyin[0] == "u":
293
+ pinyin = "w" + pinyin[1:]
294
+ if pinyin[0] == "i":
295
+ pinyin = "y" + pinyin[1:]
296
+ pinyin = pinyin.replace("ien", "in")
297
+
298
+ pinyin_list.append(pinyin)
299
+ return " ".join(pinyin_list)
300
+
301
+
302
+ # Convert numbers to Chinese pronunciation
303
+ def number_to_chinese(text):
304
+ # numbers = re.findall(r'\d+(?:\.?\d+)?', text)
305
+ # for number in numbers:
306
+ # text = text.replace(number, cn2an.an2cn(number), 1)
307
+ text = cn2an.transform(text, "an2cn")
308
+ return text
309
+
310
+
311
+ def normalization(text):
312
+ text = text.replace(",", ",")
313
+ text = text.replace("。", ".")
314
+ text = text.replace("!", "!")
315
+ text = text.replace("?", "?")
316
+ text = text.replace(";", ";")
317
+ text = text.replace(":", ":")
318
+ text = text.replace("、", ",")
319
+ text = text.replace("‘", "'")
320
+ text = text.replace("’", "'")
321
+ text = text.replace("⋯", "…")
322
+ text = text.replace("···", "…")
323
+ text = text.replace("・・・", "…")
324
+ text = text.replace("...", "…")
325
+ text = re.sub(r"\s+", "", text)
326
+ text = re.sub(r"[^\u4e00-\u9fff\s_,\.\?!;:\'…]", "", text)
327
+ text = re.sub(r"\s*([,\.\?!;:\'…])\s*", r"\1", text)
328
+ return text
329
+
330
+
331
+ def change_tone(bopomofo: str, tone: str) -> str:
332
+ if bopomofo[-1] not in "˙ˊˇˋ":
333
+ bopomofo = bopomofo + tone
334
+ else:
335
+ bopomofo = bopomofo[:-1] + tone
336
+ return bopomofo
337
+
338
+
339
+ def er_sandhi(word: str, bopomofos: List[str]) -> List[str]:
340
+ if len(word) > 1 and word[-1] == "儿" and word not in must_not_er_words:
341
+ bopomofos[-1] = change_tone(bopomofos[-1], "˙")
342
+ return bopomofos
343
+
344
+
345
+ def bu_sandhi(word: str, bopomofos: List[str]) -> List[str]:
346
+ valid_char = set(word)
347
+ if len(valid_char) == 1 and "不" in valid_char:
348
+ pass
349
+ elif word in ["不字"]:
350
+ pass
351
+ elif len(word) == 3 and word[1] == "不" and bopomofos[1][:-1] == "ㄅㄨ":
352
+ bopomofos[1] = bopomofos[1][:-1] + "˙"
353
+ else:
354
+ for i, char in enumerate(word):
355
+ if (
356
+ i + 1 < len(bopomofos)
357
+ and char == "不"
358
+ and i + 1 < len(word)
359
+ and 0 < len(bopomofos[i + 1])
360
+ and bopomofos[i + 1][-1] == "ˋ"
361
+ ):
362
+ bopomofos[i] = bopomofos[i][:-1] + "ˊ"
363
+ return bopomofos
364
+
365
+
366
+ def yi_sandhi(word: str, bopomofos: List[str]) -> List[str]:
367
+ punc = ":,;。?!“”‘’':,;.?!()(){}【】[]-~`、 "
368
+ if word.find("一") != -1 and any(
369
+ [item.isnumeric() for item in word if item != "一"]
370
+ ):
371
+ for i in range(len(word)):
372
+ if (
373
+ i == 0
374
+ and word[0] == "一"
375
+ and len(word) > 1
376
+ and word[1]
377
+ not in [
378
+ "零",
379
+ "一",
380
+ "二",
381
+ "三",
382
+ "四",
383
+ "五",
384
+ "六",
385
+ "七",
386
+ "八",
387
+ "九",
388
+ "十",
389
+ ]
390
+ ):
391
+ if len(bopomofos[0]) > 0 and bopomofos[1][-1] in ["ˋ", "˙"]:
392
+ bopomofos[0] = change_tone(bopomofos[0], "ˊ")
393
+ else:
394
+ bopomofos[0] = change_tone(bopomofos[0], "ˋ")
395
+ elif word[i] == "一":
396
+ bopomofos[i] = change_tone(bopomofos[i], "")
397
+ return bopomofos
398
+ elif len(word) == 3 and word[1] == "一" and word[0] == word[-1]:
399
+ bopomofos[1] = change_tone(bopomofos[1], "˙")
400
+ elif word.startswith("第一"):
401
+ bopomofos[1] = change_tone(bopomofos[1], "")
402
+ elif word.startswith("一月") or word.startswith("一日") or word.startswith("一号"):
403
+ bopomofos[0] = change_tone(bopomofos[0], "")
404
+ else:
405
+ for i, char in enumerate(word):
406
+ if char == "一" and i + 1 < len(word):
407
+ if (
408
+ len(bopomofos) > i + 1
409
+ and len(bopomofos[i + 1]) > 0
410
+ and bopomofos[i + 1][-1] in {"ˋ"}
411
+ ):
412
+ bopomofos[i] = change_tone(bopomofos[i], "ˊ")
413
+ else:
414
+ if word[i + 1] not in punc:
415
+ bopomofos[i] = change_tone(bopomofos[i], "ˋ")
416
+ else:
417
+ pass
418
+ return bopomofos
419
+
420
+
421
+ def merge_bu(seg: List) -> List:
422
+ new_seg = []
423
+ last_word = ""
424
+ for word in seg:
425
+ if word != "不":
426
+ if last_word == "不":
427
+ word = last_word + word
428
+ new_seg.append(word)
429
+ last_word = word
430
+ return new_seg
431
+
432
+
433
+ def merge_er(seg: List) -> List:
434
+ new_seg = []
435
+ for i, word in enumerate(seg):
436
+ if i - 1 >= 0 and word == "儿":
437
+ new_seg[-1] = new_seg[-1] + seg[i]
438
+ else:
439
+ new_seg.append(word)
440
+ return new_seg
441
+
442
+
443
+ def merge_yi(seg: List) -> List:
444
+ new_seg = []
445
+ # function 1
446
+ for i, word in enumerate(seg):
447
+ if (
448
+ i - 1 >= 0
449
+ and word == "一"
450
+ and i + 1 < len(seg)
451
+ and seg[i - 1] == seg[i + 1]
452
+ ):
453
+ if i - 1 < len(new_seg):
454
+ new_seg[i - 1] = new_seg[i - 1] + "一" + new_seg[i - 1]
455
+ else:
456
+ new_seg.append(word)
457
+ new_seg.append(seg[i + 1])
458
+ else:
459
+ if i - 2 >= 0 and seg[i - 1] == "一" and seg[i - 2] == word:
460
+ continue
461
+ else:
462
+ new_seg.append(word)
463
+ seg = new_seg
464
+ new_seg = []
465
+ isnumeric_flag = False
466
+ for i, word in enumerate(seg):
467
+ if all([item.isnumeric() for item in word]) and not isnumeric_flag:
468
+ isnumeric_flag = True
469
+ new_seg.append(word)
470
+ else:
471
+ new_seg.append(word)
472
+ seg = new_seg
473
+ new_seg = []
474
+ # function 2
475
+ for i, word in enumerate(seg):
476
+ if new_seg and new_seg[-1] == "一":
477
+ new_seg[-1] = new_seg[-1] + word
478
+ else:
479
+ new_seg.append(word)
480
+ return new_seg
481
+
482
+
483
+ # Word Segmentation, and convert Chinese pronunciation to pinyin (bopomofo)
484
+ def chinese_to_bopomofo(text_short, sentence):
485
+ # bopomofos = conv(text_short)
486
+ words = jieba.lcut(text_short, cut_all=False)
487
+ words = merge_yi(words)
488
+ words = merge_bu(words)
489
+ words = merge_er(words)
490
+ text = ""
491
+
492
+ char_index = 0
493
+ for word in words:
494
+ bopomofos = []
495
+ if word in word_pinyin_dict and word not in poly_dict:
496
+ pinyin = word_pinyin_dict[word]
497
+ for py in pinyin.split(" "):
498
+ if py[:-1] in pinyin_2_bopomofo_dict and py[-1] in tone_dict:
499
+ bopomofos.append(
500
+ pinyin_2_bopomofo_dict[py[:-1]] + tone_dict[py[-1]]
501
+ )
502
+ if BLANK_LEVEL == 1:
503
+ bopomofos.append("_")
504
+ else:
505
+ bopomofos_lazy = lazy_pinyin(word, BOPOMOFO)
506
+ bopomofos += bopomofos_lazy
507
+ if BLANK_LEVEL == 1:
508
+ bopomofos.append("_")
509
+ else:
510
+ for i in range(len(word)):
511
+ c = word[i]
512
+ if c in poly_dict:
513
+ poly_pinyin = g2pw_poly_predict.predict_process(
514
+ [text_short, char_index + i]
515
+ )[0]
516
+ py = poly_pinyin[2:-1]
517
+ bopomofos.append(
518
+ pinyin_2_bopomofo_dict[py[:-1]] + tone_dict[py[-1]]
519
+ )
520
+ if BLANK_LEVEL == 1:
521
+ bopomofos.append("_")
522
+ elif c in word_pinyin_dict:
523
+ py = word_pinyin_dict[c]
524
+ bopomofos.append(
525
+ pinyin_2_bopomofo_dict[py[:-1]] + tone_dict[py[-1]]
526
+ )
527
+ if BLANK_LEVEL == 1:
528
+ bopomofos.append("_")
529
+ else:
530
+ bopomofos.append(c)
531
+ if BLANK_LEVEL == 1:
532
+ bopomofos.append("_")
533
+ if BLANK_LEVEL == 2:
534
+ bopomofos.append("_")
535
+ char_index += len(word)
536
+
537
+ if (
538
+ len(word) == 3
539
+ and bopomofos[0][-1] == "ˇ"
540
+ and bopomofos[1][-1] == "ˇ"
541
+ and bopomofos[-1][-1] == "ˇ"
542
+ ):
543
+ bopomofos[0] = bopomofos[0] + "ˊ"
544
+ bopomofos[1] = bopomofos[1] + "ˊ"
545
+ if len(word) == 2 and bopomofos[0][-1] == "ˇ" and bopomofos[-1][-1] == "ˇ":
546
+ bopomofos[0] = bopomofos[0][:-1] + "ˊ"
547
+ bopomofos = bu_sandhi(word, bopomofos)
548
+ bopomofos = yi_sandhi(word, bopomofos)
549
+ bopomofos = er_sandhi(word, bopomofos)
550
+ if not re.search("[\u4e00-\u9fff]", word):
551
+ text += "|" + word
552
+ continue
553
+ for i in range(len(bopomofos)):
554
+ bopomofos[i] = re.sub(r"([\u3105-\u3129])$", r"\1ˉ", bopomofos[i])
555
+ if text != "":
556
+ text += "|"
557
+ text += "|".join(bopomofos)
558
+ return text
559
+
560
+
561
+ # Convert latin pronunciation to pinyin (bopomofo)
562
+ def latin_to_bopomofo(text):
563
+ for regex, replacement in _latin_to_bopomofo:
564
+ text = re.sub(regex, replacement, text)
565
+ return text
566
+
567
+
568
+ # Convert pinyin (bopomofo) to IPA
569
+ def bopomofo_to_ipa(text):
570
+ for regex, replacement in _bopomofo_to_ipa:
571
+ text = re.sub(regex, replacement, text)
572
+ return text
573
+
574
+
575
+ def _chinese_to_ipa(text, sentence):
576
+ text = number_to_chinese(text.strip())
577
+ text = normalization(text)
578
+ text = chinese_to_bopomofo(text, sentence)
579
+ # pinyin = bpmf_to_pinyin(text)
580
+ text = latin_to_bopomofo(text)
581
+ text = bopomofo_to_ipa(text)
582
+ text = re.sub("([sɹ]`[⁼ʰ]?)([→↓↑ ]+|$)", r"\1ɹ\2", text)
583
+ text = re.sub("([s][⁼ʰ]?)([→↓↑ ]+|$)", r"\1ɹ\2", text)
584
+ text = re.sub(r"^\||[^\w\s_,\.\?!;:\'…\|→↓↑⁼ʰ`]", "", text)
585
+ text = re.sub(r"([,\.\?!;:\'…])", r"|\1|", text)
586
+ text = re.sub(r"\|+", "|", text)
587
+ text = text.rstrip("|")
588
+ return text
589
+
590
+
591
+ # Convert Chinese to IPA
592
+ def chinese_to_ipa(text, sentence, text_tokenizer):
593
+ # phonemes = text_tokenizer(text.strip())
594
+ if type(text) == str:
595
+ return _chinese_to_ipa(text, sentence)
596
+ else:
597
+ result_ph = []
598
+ for t in text:
599
+ result_ph.append(_chinese_to_ipa(t, sentence))
600
+ return result_ph
diffrhythm/g2p/g2p/text_tokenizers.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2024 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import re
7
+ import os
8
+ from typing import List, Pattern, Union
9
+ from phonemizer.utils import list2str, str2list
10
+ from phonemizer.backend import EspeakBackend
11
+ from phonemizer.backend.espeak.language_switch import LanguageSwitch
12
+ from phonemizer.backend.espeak.words_mismatch import WordMismatch
13
+ from phonemizer.punctuation import Punctuation
14
+ # from zer.separator import Separator
15
+ from phonemizer.separator import Separator
16
+
17
+
18
+ class TextTokenizer:
19
+ """Phonemize Text."""
20
+
21
+ def __init__(
22
+ self,
23
+ language="en-us",
24
+ backend="espeak",
25
+ separator=Separator(word="|_|", syllable="-", phone="|"),
26
+ preserve_punctuation=True,
27
+ with_stress: bool = False,
28
+ tie: Union[bool, str] = False,
29
+ language_switch: LanguageSwitch = "remove-flags",
30
+ words_mismatch: WordMismatch = "ignore",
31
+ ) -> None:
32
+ self.preserve_punctuation_marks = ",.?!;:'…"
33
+ self.backend = EspeakBackend(
34
+ language,
35
+ punctuation_marks=self.preserve_punctuation_marks,
36
+ preserve_punctuation=preserve_punctuation,
37
+ with_stress=with_stress,
38
+ tie=tie,
39
+ language_switch=language_switch,
40
+ words_mismatch=words_mismatch,
41
+ )
42
+
43
+ self.separator = separator
44
+
45
+ # convert chinese punctuation to english punctuation
46
+ def convert_chinese_punctuation(self, text: str) -> str:
47
+ text = text.replace(",", ",")
48
+ text = text.replace("。", ".")
49
+ text = text.replace("!", "!")
50
+ text = text.replace("?", "?")
51
+ text = text.replace(";", ";")
52
+ text = text.replace(":", ":")
53
+ text = text.replace("、", ",")
54
+ text = text.replace("‘", "'")
55
+ text = text.replace("’", "'")
56
+ text = text.replace("⋯", "…")
57
+ text = text.replace("···", "…")
58
+ text = text.replace("・・・", "…")
59
+ text = text.replace("...", "…")
60
+ return text
61
+
62
+ def __call__(self, text, strip=True) -> List[str]:
63
+
64
+ text_type = type(text)
65
+ normalized_text = []
66
+ for line in str2list(text):
67
+ line = self.convert_chinese_punctuation(line.strip())
68
+ line = re.sub(r"[^\w\s_,\.\?!;:\'…]", "", line)
69
+ line = re.sub(r"\s*([,\.\?!;:\'…])\s*", r"\1", line)
70
+ line = re.sub(r"\s+", " ", line)
71
+ normalized_text.append(line)
72
+ # print("Normalized test: ", normalized_text[0])
73
+ phonemized = self.backend.phonemize(
74
+ normalized_text, separator=self.separator, strip=strip, njobs=1
75
+ )
76
+ if text_type == str:
77
+ phonemized = re.sub(r"([,\.\?!;:\'…])", r"|\1|", list2str(phonemized))
78
+ phonemized = re.sub(r"\|+", "|", phonemized)
79
+ phonemized = phonemized.rstrip("|")
80
+ else:
81
+ for i in range(len(phonemized)):
82
+ phonemized[i] = re.sub(r"([,\.\?!;:\'…])", r"|\1|", phonemized[i])
83
+ phonemized[i] = re.sub(r"\|+", "|", phonemized[i])
84
+ phonemized[i] = phonemized[i].rstrip("|")
85
+ return phonemized
diffrhythm/g2p/g2p/vocab.json ADDED
@@ -0,0 +1,372 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "vocab": {
3
+ ",": 0,
4
+ ".": 1,
5
+ "?": 2,
6
+ "!": 3,
7
+ "_": 4,
8
+ "iː": 5,
9
+ "ɪ": 6,
10
+ "ɜː": 7,
11
+ "ɚ": 8,
12
+ "oːɹ": 9,
13
+ "ɔː": 10,
14
+ "ɔːɹ": 11,
15
+ "ɑː": 12,
16
+ "uː": 13,
17
+ "ʊ": 14,
18
+ "ɑːɹ": 15,
19
+ "ʌ": 16,
20
+ "ɛ": 17,
21
+ "æ": 18,
22
+ "eɪ": 19,
23
+ "aɪ": 20,
24
+ "ɔɪ": 21,
25
+ "aʊ": 22,
26
+ "oʊ": 23,
27
+ "ɪɹ": 24,
28
+ "ɛɹ": 25,
29
+ "ʊɹ": 26,
30
+ "p": 27,
31
+ "b": 28,
32
+ "t": 29,
33
+ "d": 30,
34
+ "k": 31,
35
+ "ɡ": 32,
36
+ "f": 33,
37
+ "v": 34,
38
+ "θ": 35,
39
+ "ð": 36,
40
+ "s": 37,
41
+ "z": 38,
42
+ "ʃ": 39,
43
+ "ʒ": 40,
44
+ "h": 41,
45
+ "tʃ": 42,
46
+ "dʒ": 43,
47
+ "m": 44,
48
+ "n": 45,
49
+ "ŋ": 46,
50
+ "j": 47,
51
+ "w": 48,
52
+ "ɹ": 49,
53
+ "l": 50,
54
+ "tɹ": 51,
55
+ "dɹ": 52,
56
+ "ts": 53,
57
+ "dz": 54,
58
+ "i": 55,
59
+ "ɔ": 56,
60
+ "ə": 57,
61
+ "ɾ": 58,
62
+ "iə": 59,
63
+ "r": 60,
64
+ "u": 61,
65
+ "oː": 62,
66
+ "ɛː": 63,
67
+ "ɪː": 64,
68
+ "aɪə": 65,
69
+ "aɪɚ": 66,
70
+ "ɑ̃": 67,
71
+ "ç": 68,
72
+ "ɔ̃": 69,
73
+ "ææ": 70,
74
+ "ɐɐ": 71,
75
+ "ɡʲ": 72,
76
+ "nʲ": 73,
77
+ "iːː": 74,
78
+
79
+ "p⁼": 75,
80
+ "pʰ": 76,
81
+ "t⁼": 77,
82
+ "tʰ": 78,
83
+ "k⁼": 79,
84
+ "kʰ": 80,
85
+ "x": 81,
86
+ "tʃ⁼": 82,
87
+ "tʃʰ": 83,
88
+ "ts`⁼": 84,
89
+ "ts`ʰ": 85,
90
+ "s`": 86,
91
+ "ɹ`": 87,
92
+ "ts⁼": 88,
93
+ "tsʰ": 89,
94
+ "p⁼wo": 90,
95
+ "p⁼wo→": 91,
96
+ "p⁼wo↑": 92,
97
+ "p⁼wo↓↑": 93,
98
+ "p⁼wo↓": 94,
99
+ "pʰwo": 95,
100
+ "pʰwo→": 96,
101
+ "pʰwo↑": 97,
102
+ "pʰwo↓↑": 98,
103
+ "pʰwo↓": 99,
104
+ "mwo": 100,
105
+ "mwo→": 101,
106
+ "mwo↑": 102,
107
+ "mwo↓↑": 103,
108
+ "mwo↓": 104,
109
+ "fwo": 105,
110
+ "fwo→": 106,
111
+ "fwo↑": 107,
112
+ "fwo↓↑": 108,
113
+ "fwo↓": 109,
114
+ "jɛn": 110,
115
+ "jɛn→": 111,
116
+ "jɛn↑": 112,
117
+ "jɛn↓↑": 113,
118
+ "jɛn↓": 114,
119
+ "ɥæn": 115,
120
+ "ɥæn→": 116,
121
+ "ɥæn↑": 117,
122
+ "ɥæn↓↑": 118,
123
+ "ɥæn↓": 119,
124
+ "in": 120,
125
+ "in→": 121,
126
+ "in↑": 122,
127
+ "in↓↑": 123,
128
+ "in↓": 124,
129
+ "ɥn": 125,
130
+ "ɥn→": 126,
131
+ "ɥn↑": 127,
132
+ "ɥn↓↑": 128,
133
+ "ɥn↓": 129,
134
+ "iŋ": 130,
135
+ "iŋ→": 131,
136
+ "iŋ↑": 132,
137
+ "iŋ↓↑": 133,
138
+ "iŋ↓": 134,
139
+ "ʊŋ": 135,
140
+ "ʊŋ→": 136,
141
+ "ʊŋ↑": 137,
142
+ "ʊŋ↓↑": 138,
143
+ "ʊŋ↓": 139,
144
+ "jʊŋ": 140,
145
+ "jʊŋ→": 141,
146
+ "jʊŋ↑": 142,
147
+ "jʊŋ↓↑": 143,
148
+ "jʊŋ↓": 144,
149
+ "ia": 145,
150
+ "ia→": 146,
151
+ "ia↑": 147,
152
+ "ia↓↑": 148,
153
+ "ia↓": 149,
154
+ "iɛ": 150,
155
+ "iɛ→": 151,
156
+ "iɛ↑": 152,
157
+ "iɛ↓↑": 153,
158
+ "iɛ↓": 154,
159
+ "iɑʊ": 155,
160
+ "iɑʊ→": 156,
161
+ "iɑʊ↑": 157,
162
+ "iɑʊ↓↑": 158,
163
+ "iɑʊ↓": 159,
164
+ "ioʊ": 160,
165
+ "ioʊ→": 161,
166
+ "ioʊ↑": 162,
167
+ "ioʊ↓↑": 163,
168
+ "ioʊ↓": 164,
169
+ "iɑŋ": 165,
170
+ "iɑŋ→": 166,
171
+ "iɑŋ↑": 167,
172
+ "iɑŋ↓↑": 168,
173
+ "iɑŋ↓": 169,
174
+ "ua": 170,
175
+ "ua→": 171,
176
+ "ua↑": 172,
177
+ "ua↓↑": 173,
178
+ "ua↓": 174,
179
+ "uo": 175,
180
+ "uo→": 176,
181
+ "uo↑": 177,
182
+ "uo↓↑": 178,
183
+ "uo↓": 179,
184
+ "uaɪ": 180,
185
+ "uaɪ→": 181,
186
+ "uaɪ↑": 182,
187
+ "uaɪ↓↑": 183,
188
+ "uaɪ↓": 184,
189
+ "ueɪ": 185,
190
+ "ueɪ→": 186,
191
+ "ueɪ↑": 187,
192
+ "ueɪ↓↑": 188,
193
+ "ueɪ↓": 189,
194
+ "uan": 190,
195
+ "uan→": 191,
196
+ "uan↑": 192,
197
+ "uan↓↑": 193,
198
+ "uan↓": 194,
199
+ "uən": 195,
200
+ "uən→": 196,
201
+ "uən↑": 197,
202
+ "uən↓↑": 198,
203
+ "uən↓": 199,
204
+ "uɑŋ": 200,
205
+ "uɑŋ→": 201,
206
+ "uɑŋ↑": 202,
207
+ "uɑŋ↓↑": 203,
208
+ "uɑŋ↓": 204,
209
+ "ɥɛ": 205,
210
+ "ɥɛ→": 206,
211
+ "ɥɛ↑": 207,
212
+ "ɥɛ↓↑": 208,
213
+ "ɥɛ↓": 209,
214
+ "a": 210,
215
+ "a→": 211,
216
+ "a↑": 212,
217
+ "a↓↑": 213,
218
+ "a↓": 214,
219
+ "o": 215,
220
+ "o→": 216,
221
+ "o↑": 217,
222
+ "o↓↑": 218,
223
+ "o↓": 219,
224
+ "ə→": 220,
225
+ "ə↑": 221,
226
+ "ə↓↑": 222,
227
+ "ə↓": 223,
228
+ "ɛ→": 224,
229
+ "ɛ↑": 225,
230
+ "ɛ↓↑": 226,
231
+ "ɛ↓": 227,
232
+ "aɪ→": 228,
233
+ "aɪ↑": 229,
234
+ "aɪ↓↑": 230,
235
+ "aɪ↓": 231,
236
+ "eɪ→": 232,
237
+ "eɪ↑": 233,
238
+ "eɪ↓↑": 234,
239
+ "eɪ↓": 235,
240
+ "ɑʊ": 236,
241
+ "ɑʊ→": 237,
242
+ "ɑʊ↑": 238,
243
+ "ɑʊ↓↑": 239,
244
+ "ɑʊ↓": 240,
245
+ "oʊ→": 241,
246
+ "oʊ↑": 242,
247
+ "oʊ↓↑": 243,
248
+ "oʊ↓": 244,
249
+ "an": 245,
250
+ "an→": 246,
251
+ "an↑": 247,
252
+ "an↓↑": 248,
253
+ "an↓": 249,
254
+ "ən": 250,
255
+ "ən→": 251,
256
+ "ən↑": 252,
257
+ "ən↓↑": 253,
258
+ "ən↓": 254,
259
+ "ɑŋ": 255,
260
+ "ɑŋ→": 256,
261
+ "ɑŋ↑": 257,
262
+ "ɑŋ↓↑": 258,
263
+ "ɑŋ↓": 259,
264
+ "əŋ": 260,
265
+ "əŋ→": 261,
266
+ "əŋ↑": 262,
267
+ "əŋ↓↑": 263,
268
+ "əŋ↓": 264,
269
+ "əɹ": 265,
270
+ "əɹ→": 266,
271
+ "əɹ↑": 267,
272
+ "əɹ↓↑": 268,
273
+ "əɹ↓": 269,
274
+ "i→": 270,
275
+ "i↑": 271,
276
+ "i↓↑": 272,
277
+ "i↓": 273,
278
+ "u→": 274,
279
+ "u↑": 275,
280
+ "u↓↑": 276,
281
+ "u↓": 277,
282
+ "ɥ": 278,
283
+ "ɥ→": 279,
284
+ "ɥ↑": 280,
285
+ "ɥ↓↑": 281,
286
+ "ɥ↓": 282,
287
+ "ts`⁼ɹ": 283,
288
+ "ts`⁼ɹ→": 284,
289
+ "ts`⁼ɹ↑": 285,
290
+ "ts`⁼ɹ↓↑": 286,
291
+ "ts`⁼ɹ↓": 287,
292
+ "ts`ʰɹ": 288,
293
+ "ts`ʰɹ→": 289,
294
+ "ts`ʰɹ↑": 290,
295
+ "ts`ʰɹ↓↑": 291,
296
+ "ts`ʰɹ↓": 292,
297
+ "s`ɹ": 293,
298
+ "s`ɹ→": 294,
299
+ "s`ɹ↑": 295,
300
+ "s`ɹ↓↑": 296,
301
+ "s`ɹ���": 297,
302
+ "ɹ`ɹ": 298,
303
+ "ɹ`ɹ→": 299,
304
+ "ɹ`ɹ↑": 300,
305
+ "ɹ`ɹ↓↑": 301,
306
+ "ɹ`ɹ↓": 302,
307
+ "ts⁼ɹ": 303,
308
+ "ts⁼ɹ→": 304,
309
+ "ts⁼ɹ↑": 305,
310
+ "ts⁼ɹ↓↑": 306,
311
+ "ts⁼ɹ↓": 307,
312
+ "tsʰɹ": 308,
313
+ "tsʰɹ→": 309,
314
+ "tsʰɹ↑": 310,
315
+ "tsʰɹ↓↑": 311,
316
+ "tsʰɹ↓": 312,
317
+ "sɹ": 313,
318
+ "sɹ→": 314,
319
+ "sɹ↑": 315,
320
+ "sɹ↓↑": 316,
321
+ "sɹ↓": 317,
322
+
323
+ "ɯ": 318,
324
+ "e": 319,
325
+ "aː": 320,
326
+ "ɯː": 321,
327
+ "eː": 322,
328
+ "ç": 323,
329
+ "ɸ": 324,
330
+ "ɰᵝ": 325,
331
+ "ɴ": 326,
332
+ "g": 327,
333
+ "dʑ": 328,
334
+ "q": 329,
335
+ "ː": 330,
336
+ "bj": 331,
337
+ "tɕ": 332,
338
+ "dej": 333,
339
+ "tej": 334,
340
+ "gj": 335,
341
+ "gɯ": 336,
342
+ "çj": 337,
343
+ "kj": 338,
344
+ "kɯ": 339,
345
+ "mj": 340,
346
+ "nj": 341,
347
+ "pj": 342,
348
+ "ɾj": 343,
349
+ "ɕ": 344,
350
+ "tsɯ": 345,
351
+
352
+ "ɐ": 346,
353
+ "ɑ": 347,
354
+ "ɒ": 348,
355
+ "ɜ": 349,
356
+ "ɫ": 350,
357
+ "ʑ": 351,
358
+ "ʲ": 352,
359
+
360
+ "y": 353,
361
+ "ø": 354,
362
+ "œ": 355,
363
+ "ʁ": 356,
364
+ "̃": 357,
365
+ "ɲ": 358,
366
+
367
+ ":": 359,
368
+ ";": 360,
369
+ "'": 361,
370
+ "…": 362
371
+ }
372
+ }
diffrhythm/g2p/g2p_generation.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2024 Amphion.
2
+ #
3
+ # This source code is licensed under the MIT license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ import os
7
+ import sys
8
+
9
+ from diffrhythm.g2p.g2p import PhonemeBpeTokenizer
10
+ from diffrhythm.g2p.utils.g2p import phonemizer_g2p
11
+ import tqdm
12
+ from typing import List
13
+ import json
14
+ import os
15
+ import re
16
+
17
+
18
+ def ph_g2p(text, language):
19
+
20
+ return phonemizer_g2p(text=text, language=language)
21
+
22
+
23
+ def g2p(text, sentence, language):
24
+
25
+ return text_tokenizer.tokenize(text=text, sentence=sentence, language=language)
26
+
27
+
28
+ def is_chinese(char):
29
+ if char >= "\u4e00" and char <= "\u9fa5":
30
+ return True
31
+ else:
32
+ return False
33
+
34
+
35
+ def is_alphabet(char):
36
+ if (char >= "\u0041" and char <= "\u005a") or (
37
+ char >= "\u0061" and char <= "\u007a"
38
+ ):
39
+ return True
40
+ else:
41
+ return False
42
+
43
+
44
+ def is_other(char):
45
+ if not (is_chinese(char) or is_alphabet(char)):
46
+ return True
47
+ else:
48
+ return False
49
+
50
+
51
+ def get_segment(text: str) -> List[str]:
52
+ # sentence --> [ch_part, en_part, ch_part, ...]
53
+ segments = []
54
+ types = []
55
+ flag = 0
56
+ temp_seg = ""
57
+ temp_lang = ""
58
+
59
+ # Determine the type of each character. type: blank, chinese, alphabet, number, unk and point.
60
+ for i, ch in enumerate(text):
61
+ if is_chinese(ch):
62
+ types.append("zh")
63
+ elif is_alphabet(ch):
64
+ types.append("en")
65
+ else:
66
+ types.append("other")
67
+
68
+ assert len(types) == len(text)
69
+
70
+ for i in range(len(types)):
71
+ # find the first char of the seg
72
+ if flag == 0:
73
+ temp_seg += text[i]
74
+ temp_lang = types[i]
75
+ flag = 1
76
+ else:
77
+ if temp_lang == "other":
78
+ if types[i] == temp_lang:
79
+ temp_seg += text[i]
80
+ else:
81
+ temp_seg += text[i]
82
+ temp_lang = types[i]
83
+ else:
84
+ if types[i] == temp_lang:
85
+ temp_seg += text[i]
86
+ elif types[i] == "other":
87
+ temp_seg += text[i]
88
+ else:
89
+ segments.append((temp_seg, temp_lang))
90
+ temp_seg = text[i]
91
+ temp_lang = types[i]
92
+ flag = 1
93
+
94
+ segments.append((temp_seg, temp_lang))
95
+ return segments
96
+
97
+
98
+ def chn_eng_g2p(text: str):
99
+ # now only en and ch
100
+ segments = get_segment(text)
101
+ all_phoneme = ""
102
+ all_tokens = []
103
+
104
+ for index in range(len(segments)):
105
+ seg = segments[index]
106
+ phoneme, token = g2p(seg[0], text, seg[1])
107
+ all_phoneme += phoneme + "|"
108
+ all_tokens += token
109
+
110
+ if seg[1] == "en" and index == len(segments) - 1 and all_phoneme[-2] == "_":
111
+ all_phoneme = all_phoneme[:-2]
112
+ all_tokens = all_tokens[:-1]
113
+ return all_phoneme, all_tokens
114
+
115
+
116
+ text_tokenizer = PhonemeBpeTokenizer()
117
+ with open("./diffrhythm/g2p/g2p/vocab.json", "r") as f:
118
+ json_data = f.read()
119
+ data = json.loads(json_data)
120
+ vocab = data["vocab"]
121
+
122
+ if __name__ == '__main__':
123
+ phone, token = chn_eng_g2p("你好,hello world")
124
+ phone, token = chn_eng_g2p("你好,hello world, Bonjour, 테스트 해 보겠습니다, 五月雨緑")
125
+ print(phone)
126
+ print(token)
127
+
128
+ #phone, token = text_tokenizer.tokenize("你好,hello world, Bonjour, 테스트 해 보겠습니다, 五月雨緑", "", "auto")
129
+ phone, token = text_tokenizer.tokenize("緑", "", "auto")
130
+ #phone, token = text_tokenizer.tokenize("आइए इसका परीक्षण करें", "", "auto")
131
+ #phone, token = text_tokenizer.tokenize("आइए इसका परीक्षण करें", "", "other")
132
+ print(phone)
133
+ print(token)
diffrhythm/g2p/sources/bpmf_2_pinyin.txt ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ b ㄅ
2
+ p ㄆ
3
+ m ㄇ
4
+ f ㄈ
5
+ d ㄉ
6
+ t ㄊ
7
+ n ㄋ
8
+ l ㄌ
9
+ g ㄍ
10
+ k ㄎ
11
+ h ㄏ
12
+ j ㄐ
13
+ q ㄑ
14
+ x ㄒ
15
+ zh ㄓ
16
+ ch ㄔ
17
+ sh ㄕ
18
+ r ㄖ
19
+ z ㄗ
20
+ c ㄘ
21
+ s ㄙ
22
+ i ㄧ
23
+ u ㄨ
24
+ v ㄩ
25
+ a ㄚ
26
+ o ㄛ
27
+ e ㄜ
28
+ e ㄝ
29
+ ai ㄞ
30
+ ei ㄟ
31
+ ao ㄠ
32
+ ou ㄡ
33
+ an ㄢ
34
+ en ㄣ
35
+ ang ㄤ
36
+ eng ㄥ
37
+ er ㄦ
38
+ 2 ˊ
39
+ 3 ˇ
40
+ 4 ˋ
41
+ 0 ˙
diffrhythm/g2p/sources/chinese_lexicon.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3a7685d1c3e68eb2fa304bfc63e90c90c3c1a1948839a5b1b507b2131b3e2fb
3
+ size 14779443
diffrhythm/g2p/sources/g2p_chinese_model/config.json ADDED
@@ -0,0 +1,819 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/BERT-POLY-v2/pretrained_models/mini_bert",
3
+ "architectures": [
4
+ "BertPoly"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "directionality": "bidi",
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 384,
13
+ "id2label": {
14
+ "0": "LABEL_0",
15
+ "1": "LABEL_1",
16
+ "2": "LABEL_2",
17
+ "3": "LABEL_3",
18
+ "4": "LABEL_4",
19
+ "5": "LABEL_5",
20
+ "6": "LABEL_6",
21
+ "7": "LABEL_7",
22
+ "8": "LABEL_8",
23
+ "9": "LABEL_9",
24
+ "10": "LABEL_10",
25
+ "11": "LABEL_11",
26
+ "12": "LABEL_12",
27
+ "13": "LABEL_13",
28
+ "14": "LABEL_14",
29
+ "15": "LABEL_15",
30
+ "16": "LABEL_16",
31
+ "17": "LABEL_17",
32
+ "18": "LABEL_18",
33
+ "19": "LABEL_19",
34
+ "20": "LABEL_20",
35
+ "21": "LABEL_21",
36
+ "22": "LABEL_22",
37
+ "23": "LABEL_23",
38
+ "24": "LABEL_24",
39
+ "25": "LABEL_25",
40
+ "26": "LABEL_26",
41
+ "27": "LABEL_27",
42
+ "28": "LABEL_28",
43
+ "29": "LABEL_29",
44
+ "30": "LABEL_30",
45
+ "31": "LABEL_31",
46
+ "32": "LABEL_32",
47
+ "33": "LABEL_33",
48
+ "34": "LABEL_34",
49
+ "35": "LABEL_35",
50
+ "36": "LABEL_36",
51
+ "37": "LABEL_37",
52
+ "38": "LABEL_38",
53
+ "39": "LABEL_39",
54
+ "40": "LABEL_40",
55
+ "41": "LABEL_41",
56
+ "42": "LABEL_42",
57
+ "43": "LABEL_43",
58
+ "44": "LABEL_44",
59
+ "45": "LABEL_45",
60
+ "46": "LABEL_46",
61
+ "47": "LABEL_47",
62
+ "48": "LABEL_48",
63
+ "49": "LABEL_49",
64
+ "50": "LABEL_50",
65
+ "51": "LABEL_51",
66
+ "52": "LABEL_52",
67
+ "53": "LABEL_53",
68
+ "54": "LABEL_54",
69
+ "55": "LABEL_55",
70
+ "56": "LABEL_56",
71
+ "57": "LABEL_57",
72
+ "58": "LABEL_58",
73
+ "59": "LABEL_59",
74
+ "60": "LABEL_60",
75
+ "61": "LABEL_61",
76
+ "62": "LABEL_62",
77
+ "63": "LABEL_63",
78
+ "64": "LABEL_64",
79
+ "65": "LABEL_65",
80
+ "66": "LABEL_66",
81
+ "67": "LABEL_67",
82
+ "68": "LABEL_68",
83
+ "69": "LABEL_69",
84
+ "70": "LABEL_70",
85
+ "71": "LABEL_71",
86
+ "72": "LABEL_72",
87
+ "73": "LABEL_73",
88
+ "74": "LABEL_74",
89
+ "75": "LABEL_75",
90
+ "76": "LABEL_76",
91
+ "77": "LABEL_77",
92
+ "78": "LABEL_78",
93
+ "79": "LABEL_79",
94
+ "80": "LABEL_80",
95
+ "81": "LABEL_81",
96
+ "82": "LABEL_82",
97
+ "83": "LABEL_83",
98
+ "84": "LABEL_84",
99
+ "85": "LABEL_85",
100
+ "86": "LABEL_86",
101
+ "87": "LABEL_87",
102
+ "88": "LABEL_88",
103
+ "89": "LABEL_89",
104
+ "90": "LABEL_90",
105
+ "91": "LABEL_91",
106
+ "92": "LABEL_92",
107
+ "93": "LABEL_93",
108
+ "94": "LABEL_94",
109
+ "95": "LABEL_95",
110
+ "96": "LABEL_96",
111
+ "97": "LABEL_97",
112
+ "98": "LABEL_98",
113
+ "99": "LABEL_99",
114
+ "100": "LABEL_100",
115
+ "101": "LABEL_101",
116
+ "102": "LABEL_102",
117
+ "103": "LABEL_103",
118
+ "104": "LABEL_104",
119
+ "105": "LABEL_105",
120
+ "106": "LABEL_106",
121
+ "107": "LABEL_107",
122
+ "108": "LABEL_108",
123
+ "109": "LABEL_109",
124
+ "110": "LABEL_110",
125
+ "111": "LABEL_111",
126
+ "112": "LABEL_112",
127
+ "113": "LABEL_113",
128
+ "114": "LABEL_114",
129
+ "115": "LABEL_115",
130
+ "116": "LABEL_116",
131
+ "117": "LABEL_117",
132
+ "118": "LABEL_118",
133
+ "119": "LABEL_119",
134
+ "120": "LABEL_120",
135
+ "121": "LABEL_121",
136
+ "122": "LABEL_122",
137
+ "123": "LABEL_123",
138
+ "124": "LABEL_124",
139
+ "125": "LABEL_125",
140
+ "126": "LABEL_126",
141
+ "127": "LABEL_127",
142
+ "128": "LABEL_128",
143
+ "129": "LABEL_129",
144
+ "130": "LABEL_130",
145
+ "131": "LABEL_131",
146
+ "132": "LABEL_132",
147
+ "133": "LABEL_133",
148
+ "134": "LABEL_134",
149
+ "135": "LABEL_135",
150
+ "136": "LABEL_136",
151
+ "137": "LABEL_137",
152
+ "138": "LABEL_138",
153
+ "139": "LABEL_139",
154
+ "140": "LABEL_140",
155
+ "141": "LABEL_141",
156
+ "142": "LABEL_142",
157
+ "143": "LABEL_143",
158
+ "144": "LABEL_144",
159
+ "145": "LABEL_145",
160
+ "146": "LABEL_146",
161
+ "147": "LABEL_147",
162
+ "148": "LABEL_148",
163
+ "149": "LABEL_149",
164
+ "150": "LABEL_150",
165
+ "151": "LABEL_151",
166
+ "152": "LABEL_152",
167
+ "153": "LABEL_153",
168
+ "154": "LABEL_154",
169
+ "155": "LABEL_155",
170
+ "156": "LABEL_156",
171
+ "157": "LABEL_157",
172
+ "158": "LABEL_158",
173
+ "159": "LABEL_159",
174
+ "160": "LABEL_160",
175
+ "161": "LABEL_161",
176
+ "162": "LABEL_162",
177
+ "163": "LABEL_163",
178
+ "164": "LABEL_164",
179
+ "165": "LABEL_165",
180
+ "166": "LABEL_166",
181
+ "167": "LABEL_167",
182
+ "168": "LABEL_168",
183
+ "169": "LABEL_169",
184
+ "170": "LABEL_170",
185
+ "171": "LABEL_171",
186
+ "172": "LABEL_172",
187
+ "173": "LABEL_173",
188
+ "174": "LABEL_174",
189
+ "175": "LABEL_175",
190
+ "176": "LABEL_176",
191
+ "177": "LABEL_177",
192
+ "178": "LABEL_178",
193
+ "179": "LABEL_179",
194
+ "180": "LABEL_180",
195
+ "181": "LABEL_181",
196
+ "182": "LABEL_182",
197
+ "183": "LABEL_183",
198
+ "184": "LABEL_184",
199
+ "185": "LABEL_185",
200
+ "186": "LABEL_186",
201
+ "187": "LABEL_187",
202
+ "188": "LABEL_188",
203
+ "189": "LABEL_189",
204
+ "190": "LABEL_190",
205
+ "191": "LABEL_191",
206
+ "192": "LABEL_192",
207
+ "193": "LABEL_193",
208
+ "194": "LABEL_194",
209
+ "195": "LABEL_195",
210
+ "196": "LABEL_196",
211
+ "197": "LABEL_197",
212
+ "198": "LABEL_198",
213
+ "199": "LABEL_199",
214
+ "200": "LABEL_200",
215
+ "201": "LABEL_201",
216
+ "202": "LABEL_202",
217
+ "203": "LABEL_203",
218
+ "204": "LABEL_204",
219
+ "205": "LABEL_205",
220
+ "206": "LABEL_206",
221
+ "207": "LABEL_207",
222
+ "208": "LABEL_208",
223
+ "209": "LABEL_209",
224
+ "210": "LABEL_210",
225
+ "211": "LABEL_211",
226
+ "212": "LABEL_212",
227
+ "213": "LABEL_213",
228
+ "214": "LABEL_214",
229
+ "215": "LABEL_215",
230
+ "216": "LABEL_216",
231
+ "217": "LABEL_217",
232
+ "218": "LABEL_218",
233
+ "219": "LABEL_219",
234
+ "220": "LABEL_220",
235
+ "221": "LABEL_221",
236
+ "222": "LABEL_222",
237
+ "223": "LABEL_223",
238
+ "224": "LABEL_224",
239
+ "225": "LABEL_225",
240
+ "226": "LABEL_226",
241
+ "227": "LABEL_227",
242
+ "228": "LABEL_228",
243
+ "229": "LABEL_229",
244
+ "230": "LABEL_230",
245
+ "231": "LABEL_231",
246
+ "232": "LABEL_232",
247
+ "233": "LABEL_233",
248
+ "234": "LABEL_234",
249
+ "235": "LABEL_235",
250
+ "236": "LABEL_236",
251
+ "237": "LABEL_237",
252
+ "238": "LABEL_238",
253
+ "239": "LABEL_239",
254
+ "240": "LABEL_240",
255
+ "241": "LABEL_241",
256
+ "242": "LABEL_242",
257
+ "243": "LABEL_243",
258
+ "244": "LABEL_244",
259
+ "245": "LABEL_245",
260
+ "246": "LABEL_246",
261
+ "247": "LABEL_247",
262
+ "248": "LABEL_248",
263
+ "249": "LABEL_249",
264
+ "250": "LABEL_250",
265
+ "251": "LABEL_251",
266
+ "252": "LABEL_252",
267
+ "253": "LABEL_253",
268
+ "254": "LABEL_254",
269
+ "255": "LABEL_255",
270
+ "256": "LABEL_256",
271
+ "257": "LABEL_257",
272
+ "258": "LABEL_258",
273
+ "259": "LABEL_259",
274
+ "260": "LABEL_260",
275
+ "261": "LABEL_261",
276
+ "262": "LABEL_262",
277
+ "263": "LABEL_263",
278
+ "264": "LABEL_264",
279
+ "265": "LABEL_265",
280
+ "266": "LABEL_266",
281
+ "267": "LABEL_267",
282
+ "268": "LABEL_268",
283
+ "269": "LABEL_269",
284
+ "270": "LABEL_270",
285
+ "271": "LABEL_271",
286
+ "272": "LABEL_272",
287
+ "273": "LABEL_273",
288
+ "274": "LABEL_274",
289
+ "275": "LABEL_275",
290
+ "276": "LABEL_276",
291
+ "277": "LABEL_277",
292
+ "278": "LABEL_278",
293
+ "279": "LABEL_279",
294
+ "280": "LABEL_280",
295
+ "281": "LABEL_281",
296
+ "282": "LABEL_282",
297
+ "283": "LABEL_283",
298
+ "284": "LABEL_284",
299
+ "285": "LABEL_285",
300
+ "286": "LABEL_286",
301
+ "287": "LABEL_287",
302
+ "288": "LABEL_288",
303
+ "289": "LABEL_289",
304
+ "290": "LABEL_290",
305
+ "291": "LABEL_291",
306
+ "292": "LABEL_292",
307
+ "293": "LABEL_293",
308
+ "294": "LABEL_294",
309
+ "295": "LABEL_295",
310
+ "296": "LABEL_296",
311
+ "297": "LABEL_297",
312
+ "298": "LABEL_298",
313
+ "299": "LABEL_299",
314
+ "300": "LABEL_300",
315
+ "301": "LABEL_301",
316
+ "302": "LABEL_302",
317
+ "303": "LABEL_303",
318
+ "304": "LABEL_304",
319
+ "305": "LABEL_305",
320
+ "306": "LABEL_306",
321
+ "307": "LABEL_307",
322
+ "308": "LABEL_308",
323
+ "309": "LABEL_309",
324
+ "310": "LABEL_310",
325
+ "311": "LABEL_311",
326
+ "312": "LABEL_312",
327
+ "313": "LABEL_313",
328
+ "314": "LABEL_314",
329
+ "315": "LABEL_315",
330
+ "316": "LABEL_316",
331
+ "317": "LABEL_317",
332
+ "318": "LABEL_318",
333
+ "319": "LABEL_319",
334
+ "320": "LABEL_320",
335
+ "321": "LABEL_321",
336
+ "322": "LABEL_322",
337
+ "323": "LABEL_323",
338
+ "324": "LABEL_324",
339
+ "325": "LABEL_325",
340
+ "326": "LABEL_326",
341
+ "327": "LABEL_327",
342
+ "328": "LABEL_328",
343
+ "329": "LABEL_329",
344
+ "330": "LABEL_330",
345
+ "331": "LABEL_331",
346
+ "332": "LABEL_332",
347
+ "333": "LABEL_333",
348
+ "334": "LABEL_334",
349
+ "335": "LABEL_335",
350
+ "336": "LABEL_336",
351
+ "337": "LABEL_337",
352
+ "338": "LABEL_338",
353
+ "339": "LABEL_339",
354
+ "340": "LABEL_340",
355
+ "341": "LABEL_341",
356
+ "342": "LABEL_342",
357
+ "343": "LABEL_343",
358
+ "344": "LABEL_344",
359
+ "345": "LABEL_345",
360
+ "346": "LABEL_346",
361
+ "347": "LABEL_347",
362
+ "348": "LABEL_348",
363
+ "349": "LABEL_349",
364
+ "350": "LABEL_350",
365
+ "351": "LABEL_351",
366
+ "352": "LABEL_352",
367
+ "353": "LABEL_353",
368
+ "354": "LABEL_354",
369
+ "355": "LABEL_355",
370
+ "356": "LABEL_356",
371
+ "357": "LABEL_357",
372
+ "358": "LABEL_358",
373
+ "359": "LABEL_359",
374
+ "360": "LABEL_360",
375
+ "361": "LABEL_361",
376
+ "362": "LABEL_362",
377
+ "363": "LABEL_363",
378
+ "364": "LABEL_364",
379
+ "365": "LABEL_365",
380
+ "366": "LABEL_366",
381
+ "367": "LABEL_367",
382
+ "368": "LABEL_368",
383
+ "369": "LABEL_369",
384
+ "370": "LABEL_370",
385
+ "371": "LABEL_371",
386
+ "372": "LABEL_372",
387
+ "373": "LABEL_373",
388
+ "374": "LABEL_374",
389
+ "375": "LABEL_375",
390
+ "376": "LABEL_376",
391
+ "377": "LABEL_377",
392
+ "378": "LABEL_378",
393
+ "379": "LABEL_379",
394
+ "380": "LABEL_380",
395
+ "381": "LABEL_381",
396
+ "382": "LABEL_382",
397
+ "383": "LABEL_383",
398
+ "384": "LABEL_384",
399
+ "385": "LABEL_385",
400
+ "386": "LABEL_386",
401
+ "387": "LABEL_387",
402
+ "388": "LABEL_388",
403
+ "389": "LABEL_389",
404
+ "390": "LABEL_390"
405
+ },
406
+ "initializer_range": 0.02,
407
+ "intermediate_size": 1536,
408
+ "label2id": {
409
+ "LABEL_0": 0,
410
+ "LABEL_1": 1,
411
+ "LABEL_10": 10,
412
+ "LABEL_100": 100,
413
+ "LABEL_101": 101,
414
+ "LABEL_102": 102,
415
+ "LABEL_103": 103,
416
+ "LABEL_104": 104,
417
+ "LABEL_105": 105,
418
+ "LABEL_106": 106,
419
+ "LABEL_107": 107,
420
+ "LABEL_108": 108,
421
+ "LABEL_109": 109,
422
+ "LABEL_11": 11,
423
+ "LABEL_110": 110,
424
+ "LABEL_111": 111,
425
+ "LABEL_112": 112,
426
+ "LABEL_113": 113,
427
+ "LABEL_114": 114,
428
+ "LABEL_115": 115,
429
+ "LABEL_116": 116,
430
+ "LABEL_117": 117,
431
+ "LABEL_118": 118,
432
+ "LABEL_119": 119,
433
+ "LABEL_12": 12,
434
+ "LABEL_120": 120,
435
+ "LABEL_121": 121,
436
+ "LABEL_122": 122,
437
+ "LABEL_123": 123,
438
+ "LABEL_124": 124,
439
+ "LABEL_125": 125,
440
+ "LABEL_126": 126,
441
+ "LABEL_127": 127,
442
+ "LABEL_128": 128,
443
+ "LABEL_129": 129,
444
+ "LABEL_13": 13,
445
+ "LABEL_130": 130,
446
+ "LABEL_131": 131,
447
+ "LABEL_132": 132,
448
+ "LABEL_133": 133,
449
+ "LABEL_134": 134,
450
+ "LABEL_135": 135,
451
+ "LABEL_136": 136,
452
+ "LABEL_137": 137,
453
+ "LABEL_138": 138,
454
+ "LABEL_139": 139,
455
+ "LABEL_14": 14,
456
+ "LABEL_140": 140,
457
+ "LABEL_141": 141,
458
+ "LABEL_142": 142,
459
+ "LABEL_143": 143,
460
+ "LABEL_144": 144,
461
+ "LABEL_145": 145,
462
+ "LABEL_146": 146,
463
+ "LABEL_147": 147,
464
+ "LABEL_148": 148,
465
+ "LABEL_149": 149,
466
+ "LABEL_15": 15,
467
+ "LABEL_150": 150,
468
+ "LABEL_151": 151,
469
+ "LABEL_152": 152,
470
+ "LABEL_153": 153,
471
+ "LABEL_154": 154,
472
+ "LABEL_155": 155,
473
+ "LABEL_156": 156,
474
+ "LABEL_157": 157,
475
+ "LABEL_158": 158,
476
+ "LABEL_159": 159,
477
+ "LABEL_16": 16,
478
+ "LABEL_160": 160,
479
+ "LABEL_161": 161,
480
+ "LABEL_162": 162,
481
+ "LABEL_163": 163,
482
+ "LABEL_164": 164,
483
+ "LABEL_165": 165,
484
+ "LABEL_166": 166,
485
+ "LABEL_167": 167,
486
+ "LABEL_168": 168,
487
+ "LABEL_169": 169,
488
+ "LABEL_17": 17,
489
+ "LABEL_170": 170,
490
+ "LABEL_171": 171,
491
+ "LABEL_172": 172,
492
+ "LABEL_173": 173,
493
+ "LABEL_174": 174,
494
+ "LABEL_175": 175,
495
+ "LABEL_176": 176,
496
+ "LABEL_177": 177,
497
+ "LABEL_178": 178,
498
+ "LABEL_179": 179,
499
+ "LABEL_18": 18,
500
+ "LABEL_180": 180,
501
+ "LABEL_181": 181,
502
+ "LABEL_182": 182,
503
+ "LABEL_183": 183,
504
+ "LABEL_184": 184,
505
+ "LABEL_185": 185,
506
+ "LABEL_186": 186,
507
+ "LABEL_187": 187,
508
+ "LABEL_188": 188,
509
+ "LABEL_189": 189,
510
+ "LABEL_19": 19,
511
+ "LABEL_190": 190,
512
+ "LABEL_191": 191,
513
+ "LABEL_192": 192,
514
+ "LABEL_193": 193,
515
+ "LABEL_194": 194,
516
+ "LABEL_195": 195,
517
+ "LABEL_196": 196,
518
+ "LABEL_197": 197,
519
+ "LABEL_198": 198,
520
+ "LABEL_199": 199,
521
+ "LABEL_2": 2,
522
+ "LABEL_20": 20,
523
+ "LABEL_200": 200,
524
+ "LABEL_201": 201,
525
+ "LABEL_202": 202,
526
+ "LABEL_203": 203,
527
+ "LABEL_204": 204,
528
+ "LABEL_205": 205,
529
+ "LABEL_206": 206,
530
+ "LABEL_207": 207,
531
+ "LABEL_208": 208,
532
+ "LABEL_209": 209,
533
+ "LABEL_21": 21,
534
+ "LABEL_210": 210,
535
+ "LABEL_211": 211,
536
+ "LABEL_212": 212,
537
+ "LABEL_213": 213,
538
+ "LABEL_214": 214,
539
+ "LABEL_215": 215,
540
+ "LABEL_216": 216,
541
+ "LABEL_217": 217,
542
+ "LABEL_218": 218,
543
+ "LABEL_219": 219,
544
+ "LABEL_22": 22,
545
+ "LABEL_220": 220,
546
+ "LABEL_221": 221,
547
+ "LABEL_222": 222,
548
+ "LABEL_223": 223,
549
+ "LABEL_224": 224,
550
+ "LABEL_225": 225,
551
+ "LABEL_226": 226,
552
+ "LABEL_227": 227,
553
+ "LABEL_228": 228,
554
+ "LABEL_229": 229,
555
+ "LABEL_23": 23,
556
+ "LABEL_230": 230,
557
+ "LABEL_231": 231,
558
+ "LABEL_232": 232,
559
+ "LABEL_233": 233,
560
+ "LABEL_234": 234,
561
+ "LABEL_235": 235,
562
+ "LABEL_236": 236,
563
+ "LABEL_237": 237,
564
+ "LABEL_238": 238,
565
+ "LABEL_239": 239,
566
+ "LABEL_24": 24,
567
+ "LABEL_240": 240,
568
+ "LABEL_241": 241,
569
+ "LABEL_242": 242,
570
+ "LABEL_243": 243,
571
+ "LABEL_244": 244,
572
+ "LABEL_245": 245,
573
+ "LABEL_246": 246,
574
+ "LABEL_247": 247,
575
+ "LABEL_248": 248,
576
+ "LABEL_249": 249,
577
+ "LABEL_25": 25,
578
+ "LABEL_250": 250,
579
+ "LABEL_251": 251,
580
+ "LABEL_252": 252,
581
+ "LABEL_253": 253,
582
+ "LABEL_254": 254,
583
+ "LABEL_255": 255,
584
+ "LABEL_256": 256,
585
+ "LABEL_257": 257,
586
+ "LABEL_258": 258,
587
+ "LABEL_259": 259,
588
+ "LABEL_26": 26,
589
+ "LABEL_260": 260,
590
+ "LABEL_261": 261,
591
+ "LABEL_262": 262,
592
+ "LABEL_263": 263,
593
+ "LABEL_264": 264,
594
+ "LABEL_265": 265,
595
+ "LABEL_266": 266,
596
+ "LABEL_267": 267,
597
+ "LABEL_268": 268,
598
+ "LABEL_269": 269,
599
+ "LABEL_27": 27,
600
+ "LABEL_270": 270,
601
+ "LABEL_271": 271,
602
+ "LABEL_272": 272,
603
+ "LABEL_273": 273,
604
+ "LABEL_274": 274,
605
+ "LABEL_275": 275,
606
+ "LABEL_276": 276,
607
+ "LABEL_277": 277,
608
+ "LABEL_278": 278,
609
+ "LABEL_279": 279,
610
+ "LABEL_28": 28,
611
+ "LABEL_280": 280,
612
+ "LABEL_281": 281,
613
+ "LABEL_282": 282,
614
+ "LABEL_283": 283,
615
+ "LABEL_284": 284,
616
+ "LABEL_285": 285,
617
+ "LABEL_286": 286,
618
+ "LABEL_287": 287,
619
+ "LABEL_288": 288,
620
+ "LABEL_289": 289,
621
+ "LABEL_29": 29,
622
+ "LABEL_290": 290,
623
+ "LABEL_291": 291,
624
+ "LABEL_292": 292,
625
+ "LABEL_293": 293,
626
+ "LABEL_294": 294,
627
+ "LABEL_295": 295,
628
+ "LABEL_296": 296,
629
+ "LABEL_297": 297,
630
+ "LABEL_298": 298,
631
+ "LABEL_299": 299,
632
+ "LABEL_3": 3,
633
+ "LABEL_30": 30,
634
+ "LABEL_300": 300,
635
+ "LABEL_301": 301,
636
+ "LABEL_302": 302,
637
+ "LABEL_303": 303,
638
+ "LABEL_304": 304,
639
+ "LABEL_305": 305,
640
+ "LABEL_306": 306,
641
+ "LABEL_307": 307,
642
+ "LABEL_308": 308,
643
+ "LABEL_309": 309,
644
+ "LABEL_31": 31,
645
+ "LABEL_310": 310,
646
+ "LABEL_311": 311,
647
+ "LABEL_312": 312,
648
+ "LABEL_313": 313,
649
+ "LABEL_314": 314,
650
+ "LABEL_315": 315,
651
+ "LABEL_316": 316,
652
+ "LABEL_317": 317,
653
+ "LABEL_318": 318,
654
+ "LABEL_319": 319,
655
+ "LABEL_32": 32,
656
+ "LABEL_320": 320,
657
+ "LABEL_321": 321,
658
+ "LABEL_322": 322,
659
+ "LABEL_323": 323,
660
+ "LABEL_324": 324,
661
+ "LABEL_325": 325,
662
+ "LABEL_326": 326,
663
+ "LABEL_327": 327,
664
+ "LABEL_328": 328,
665
+ "LABEL_329": 329,
666
+ "LABEL_33": 33,
667
+ "LABEL_330": 330,
668
+ "LABEL_331": 331,
669
+ "LABEL_332": 332,
670
+ "LABEL_333": 333,
671
+ "LABEL_334": 334,
672
+ "LABEL_335": 335,
673
+ "LABEL_336": 336,
674
+ "LABEL_337": 337,
675
+ "LABEL_338": 338,
676
+ "LABEL_339": 339,
677
+ "LABEL_34": 34,
678
+ "LABEL_340": 340,
679
+ "LABEL_341": 341,
680
+ "LABEL_342": 342,
681
+ "LABEL_343": 343,
682
+ "LABEL_344": 344,
683
+ "LABEL_345": 345,
684
+ "LABEL_346": 346,
685
+ "LABEL_347": 347,
686
+ "LABEL_348": 348,
687
+ "LABEL_349": 349,
688
+ "LABEL_35": 35,
689
+ "LABEL_350": 350,
690
+ "LABEL_351": 351,
691
+ "LABEL_352": 352,
692
+ "LABEL_353": 353,
693
+ "LABEL_354": 354,
694
+ "LABEL_355": 355,
695
+ "LABEL_356": 356,
696
+ "LABEL_357": 357,
697
+ "LABEL_358": 358,
698
+ "LABEL_359": 359,
699
+ "LABEL_36": 36,
700
+ "LABEL_360": 360,
701
+ "LABEL_361": 361,
702
+ "LABEL_362": 362,
703
+ "LABEL_363": 363,
704
+ "LABEL_364": 364,
705
+ "LABEL_365": 365,
706
+ "LABEL_366": 366,
707
+ "LABEL_367": 367,
708
+ "LABEL_368": 368,
709
+ "LABEL_369": 369,
710
+ "LABEL_37": 37,
711
+ "LABEL_370": 370,
712
+ "LABEL_371": 371,
713
+ "LABEL_372": 372,
714
+ "LABEL_373": 373,
715
+ "LABEL_374": 374,
716
+ "LABEL_375": 375,
717
+ "LABEL_376": 376,
718
+ "LABEL_377": 377,
719
+ "LABEL_378": 378,
720
+ "LABEL_379": 379,
721
+ "LABEL_38": 38,
722
+ "LABEL_380": 380,
723
+ "LABEL_381": 381,
724
+ "LABEL_382": 382,
725
+ "LABEL_383": 383,
726
+ "LABEL_384": 384,
727
+ "LABEL_385": 385,
728
+ "LABEL_386": 386,
729
+ "LABEL_387": 387,
730
+ "LABEL_388": 388,
731
+ "LABEL_389": 389,
732
+ "LABEL_39": 39,
733
+ "LABEL_390": 390,
734
+ "LABEL_4": 4,
735
+ "LABEL_40": 40,
736
+ "LABEL_41": 41,
737
+ "LABEL_42": 42,
738
+ "LABEL_43": 43,
739
+ "LABEL_44": 44,
740
+ "LABEL_45": 45,
741
+ "LABEL_46": 46,
742
+ "LABEL_47": 47,
743
+ "LABEL_48": 48,
744
+ "LABEL_49": 49,
745
+ "LABEL_5": 5,
746
+ "LABEL_50": 50,
747
+ "LABEL_51": 51,
748
+ "LABEL_52": 52,
749
+ "LABEL_53": 53,
750
+ "LABEL_54": 54,
751
+ "LABEL_55": 55,
752
+ "LABEL_56": 56,
753
+ "LABEL_57": 57,
754
+ "LABEL_58": 58,
755
+ "LABEL_59": 59,
756
+ "LABEL_6": 6,
757
+ "LABEL_60": 60,
758
+ "LABEL_61": 61,
759
+ "LABEL_62": 62,
760
+ "LABEL_63": 63,
761
+ "LABEL_64": 64,
762
+ "LABEL_65": 65,
763
+ "LABEL_66": 66,
764
+ "LABEL_67": 67,
765
+ "LABEL_68": 68,
766
+ "LABEL_69": 69,
767
+ "LABEL_7": 7,
768
+ "LABEL_70": 70,
769
+ "LABEL_71": 71,
770
+ "LABEL_72": 72,
771
+ "LABEL_73": 73,
772
+ "LABEL_74": 74,
773
+ "LABEL_75": 75,
774
+ "LABEL_76": 76,
775
+ "LABEL_77": 77,
776
+ "LABEL_78": 78,
777
+ "LABEL_79": 79,
778
+ "LABEL_8": 8,
779
+ "LABEL_80": 80,
780
+ "LABEL_81": 81,
781
+ "LABEL_82": 82,
782
+ "LABEL_83": 83,
783
+ "LABEL_84": 84,
784
+ "LABEL_85": 85,
785
+ "LABEL_86": 86,
786
+ "LABEL_87": 87,
787
+ "LABEL_88": 88,
788
+ "LABEL_89": 89,
789
+ "LABEL_9": 9,
790
+ "LABEL_90": 90,
791
+ "LABEL_91": 91,
792
+ "LABEL_92": 92,
793
+ "LABEL_93": 93,
794
+ "LABEL_94": 94,
795
+ "LABEL_95": 95,
796
+ "LABEL_96": 96,
797
+ "LABEL_97": 97,
798
+ "LABEL_98": 98,
799
+ "LABEL_99": 99
800
+ },
801
+ "layer_norm_eps": 1e-12,
802
+ "max_position_embeddings": 512,
803
+ "model_type": "bert",
804
+ "num_attention_heads": 12,
805
+ "num_hidden_layers": 6,
806
+ "num_relation_heads": 32,
807
+ "pad_token_id": 0,
808
+ "pooler_fc_size": 768,
809
+ "pooler_num_attention_heads": 12,
810
+ "pooler_num_fc_layers": 3,
811
+ "pooler_size_per_head": 128,
812
+ "pooler_type": "first_token_transform",
813
+ "position_embedding_type": "absolute",
814
+ "torch_dtype": "float32",
815
+ "transformers_version": "4.44.1",
816
+ "type_vocab_size": 2,
817
+ "use_cache": true,
818
+ "vocab_size": 21128
819
+ }
diffrhythm/g2p/sources/g2p_chinese_model/poly_bert_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8765d835ffdf9811c832d4dc7b6a552757aa8615c01d1184db716a50c20aebbc
3
+ size 76583333
diffrhythm/g2p/sources/g2p_chinese_model/polychar.txt ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+
4
+
5
+
6
+
7
+
8
+
9
+
10
+
11
+
12
+
13
+
14
+
15
+ 便
16
+
17
+
18
+
19
+
20
+
21
+
22
+
23
+
24
+
25
+
26
+
27
+
28
+
29
+
30
+
31
+
32
+
33
+
34
+
35
+
36
+
37
+
38
+
39
+
40
+
41
+
42
+
43
+
44
+
45
+
46
+
47
+
48
+
49
+
50
+
51
+
52
+
53
+ 宿
54
+
55
+
56
+
57
+
58
+
59
+
60
+
61
+
62
+
63
+
64
+
65
+
66
+
67
+
68
+
69
+
70
+
71
+
72
+
73
+
74
+
75
+
76
+
77
+
78
+
79
+
80
+
81
+
82
+
83
+
84
+
85
+
86
+
87
+
88
+
89
+
90
+
91
+
92
+
93
+
94
+
95
+
96
+
97
+
98
+
99
+
100
+
101
+
102
+
103
+
104
+
105
+
106
+
107
+
108
+
109
+
110
+
111
+
112
+
113
+
114
+
115
+
116
+
117
+
118
+
119
+
120
+
121
+
122
+
123
+
124
+
125
+
126
+
127
+
128
+
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
138
+
139
+
140
+
141
+
142
+
143
+
144
+
145
+
146
+
147
+
148
+
149
+
150
+
151
+
152
+
153
+
154
+
155
+
156
+
157
+
158
+
159
+
diffrhythm/g2p/sources/g2p_chinese_model/polydict.json ADDED
@@ -0,0 +1,393 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "1": "丧{sang1}",
3
+ "2": "丧{sang4}",
4
+ "3": "中{zhong1}",
5
+ "4": "中{zhong4}",
6
+ "5": "为{wei2}",
7
+ "6": "为{wei4}",
8
+ "7": "乌{wu1}",
9
+ "8": "乌{wu4}",
10
+ "9": "乐{lao4}",
11
+ "10": "乐{le4}",
12
+ "11": "乐{le5}",
13
+ "12": "乐{yao4}",
14
+ "13": "乐{yve4}",
15
+ "14": "了{le5}",
16
+ "15": "了{liao3}",
17
+ "16": "了{liao5}",
18
+ "17": "什{shen2}",
19
+ "18": "什{shi2}",
20
+ "19": "仔{zai3}",
21
+ "20": "仔{zai5}",
22
+ "21": "仔{zi3}",
23
+ "22": "仔{zi5}",
24
+ "23": "令{ling2}",
25
+ "24": "令{ling4}",
26
+ "25": "任{ren2}",
27
+ "26": "任{ren4}",
28
+ "27": "会{hui4}",
29
+ "28": "会{hui5}",
30
+ "29": "会{kuai4}",
31
+ "30": "传{chuan2}",
32
+ "31": "传{zhuan4}",
33
+ "32": "佛{fo2}",
34
+ "33": "佛{fu2}",
35
+ "34": "供{gong1}",
36
+ "35": "供{gong4}",
37
+ "36": "便{bian4}",
38
+ "37": "便{pian2}",
39
+ "38": "倒{dao3}",
40
+ "39": "倒{dao4}",
41
+ "40": "假{jia3}",
42
+ "41": "假{jia4}",
43
+ "42": "兴{xing1}",
44
+ "43": "兴{xing4}",
45
+ "44": "冠{guan1}",
46
+ "45": "冠{guan4}",
47
+ "46": "冲{chong1}",
48
+ "47": "冲{chong4}",
49
+ "48": "几{ji1}",
50
+ "49": "几{ji2}",
51
+ "50": "几{ji3}",
52
+ "51": "分{fen1}",
53
+ "52": "分{fen4}",
54
+ "53": "分{fen5}",
55
+ "54": "切{qie1}",
56
+ "55": "切{qie4}",
57
+ "56": "划{hua2}",
58
+ "57": "划{hua4}",
59
+ "58": "划{hua5}",
60
+ "59": "创{chuang1}",
61
+ "60": "创{chuang4}",
62
+ "61": "剥{bao1}",
63
+ "62": "剥{bo1}",
64
+ "63": "勒{le4}",
65
+ "64": "勒{le5}",
66
+ "65": "勒{lei1}",
67
+ "66": "区{ou1}",
68
+ "67": "区{qu1}",
69
+ "68": "华{hua2}",
70
+ "69": "华{hua4}",
71
+ "70": "单{chan2}",
72
+ "71": "单{dan1}",
73
+ "72": "单{shan4}",
74
+ "73": "卜{bo5}",
75
+ "74": "卜{bu3}",
76
+ "75": "占{zhan1}",
77
+ "76": "占{zhan4}",
78
+ "77": "卡{ka2}",
79
+ "78": "卡{ka3}",
80
+ "79": "卡{qia3}",
81
+ "80": "卷{jvan3}",
82
+ "81": "卷{jvan4}",
83
+ "82": "厦{sha4}",
84
+ "83": "厦{xia4}",
85
+ "84": "参{can1}",
86
+ "85": "参{cen1}",
87
+ "86": "参{shen1}",
88
+ "87": "发{fa1}",
89
+ "88": "发{fa4}",
90
+ "89": "发{fa5}",
91
+ "90": "只{zhi1}",
92
+ "91": "只{zhi3}",
93
+ "92": "号{hao2}",
94
+ "93": "号{hao4}",
95
+ "94": "号{hao5}",
96
+ "95": "同{tong2}",
97
+ "96": "同{tong4}",
98
+ "97": "同{tong5}",
99
+ "98": "吐{tu2}",
100
+ "99": "吐{tu3}",
101
+ "100": "吐{tu4}",
102
+ "101": "和{he2}",
103
+ "102": "和{he4}",
104
+ "103": "和{he5}",
105
+ "104": "和{huo2}",
106
+ "105": "和{huo4}",
107
+ "106": "和{huo5}",
108
+ "107": "喝{he1}",
109
+ "108": "喝{he4}",
110
+ "109": "圈{jvan4}",
111
+ "110": "圈{qvan1}",
112
+ "111": "圈{qvan5}",
113
+ "112": "地{de5}",
114
+ "113": "地{di4}",
115
+ "114": "地{di5}",
116
+ "115": "塞{sai1}",
117
+ "116": "塞{sai2}",
118
+ "117": "塞{sai4}",
119
+ "118": "塞{se4}",
120
+ "119": "壳{ke2}",
121
+ "120": "壳{qiao4}",
122
+ "121": "处{chu3}",
123
+ "122": "处{chu4}",
124
+ "123": "奇{ji1}",
125
+ "124": "奇{qi2}",
126
+ "125": "奔{ben1}",
127
+ "126": "奔{ben4}",
128
+ "127": "好{hao3}",
129
+ "128": "好{hao4}",
130
+ "129": "好{hao5}",
131
+ "130": "宁{ning2}",
132
+ "131": "宁{ning4}",
133
+ "132": "宁{ning5}",
134
+ "133": "宿{su4}",
135
+ "134": "宿{xiu3}",
136
+ "135": "宿{xiu4}",
137
+ "136": "将{jiang1}",
138
+ "137": "将{jiang4}",
139
+ "138": "少{shao3}",
140
+ "139": "少{shao4}",
141
+ "140": "尽{jin3}",
142
+ "141": "尽{jin4}",
143
+ "142": "岗{gang1}",
144
+ "143": "岗{gang3}",
145
+ "144": "差{cha1}",
146
+ "145": "差{cha4}",
147
+ "146": "差{chai1}",
148
+ "147": "差{ci1}",
149
+ "148": "巷{hang4}",
150
+ "149": "巷{xiang4}",
151
+ "150": "帖{tie1}",
152
+ "151": "帖{tie3}",
153
+ "152": "帖{tie4}",
154
+ "153": "干{gan1}",
155
+ "154": "干{gan4}",
156
+ "155": "应{ying1}",
157
+ "156": "应{ying4}",
158
+ "157": "应{ying5}",
159
+ "158": "度{du4}",
160
+ "159": "度{du5}",
161
+ "160": "度{duo2}",
162
+ "161": "弹{dan4}",
163
+ "162": "弹{tan2}",
164
+ "163": "弹{tan5}",
165
+ "164": "强{jiang4}",
166
+ "165": "强{qiang2}",
167
+ "166": "强{qiang3}",
168
+ "167": "当{dang1}",
169
+ "168": "当{dang4}",
170
+ "169": "当{dang5}",
171
+ "170": "待{dai1}",
172
+ "171": "待{dai4}",
173
+ "172": "得{de2}",
174
+ "173": "得{de5}",
175
+ "174": "得{dei3}",
176
+ "175": "得{dei5}",
177
+ "176": "恶{e3}",
178
+ "177": "恶{e4}",
179
+ "178": "恶{wu4}",
180
+ "179": "扁{bian3}",
181
+ "180": "扁{pian1}",
182
+ "181": "扇{shan1}",
183
+ "182": "扇{shan4}",
184
+ "183": "扎{za1}",
185
+ "184": "扎{zha1}",
186
+ "185": "扎{zha2}",
187
+ "186": "扫{sao3}",
188
+ "187": "扫{sao4}",
189
+ "188": "担{dan1}",
190
+ "189": "担{dan4}",
191
+ "190": "担{dan5}",
192
+ "191": "挑{tiao1}",
193
+ "192": "挑{tiao3}",
194
+ "193": "据{jv1}",
195
+ "194": "据{jv4}",
196
+ "195": "撒{sa1}",
197
+ "196": "撒{sa3}",
198
+ "197": "撒{sa5}",
199
+ "198": "教{jiao1}",
200
+ "199": "教{jiao4}",
201
+ "200": "散{san3}",
202
+ "201": "散{san4}",
203
+ "202": "散{san5}",
204
+ "203": "数{shu3}",
205
+ "204": "数{shu4}",
206
+ "205": "数{shu5}",
207
+ "206": "斗{dou3}",
208
+ "207": "斗{dou4}",
209
+ "208": "晃{huang3}",
210
+ "209": "曝{bao4}",
211
+ "210": "曲{qu1}",
212
+ "211": "曲{qu3}",
213
+ "212": "更{geng1}",
214
+ "213": "更{geng4}",
215
+ "214": "曾{ceng1}",
216
+ "215": "曾{ceng2}",
217
+ "216": "曾{zeng1}",
218
+ "217": "朝{chao2}",
219
+ "218": "朝{zhao1}",
220
+ "219": "朴{piao2}",
221
+ "220": "朴{pu2}",
222
+ "221": "朴{pu3}",
223
+ "222": "杆{gan1}",
224
+ "223": "杆{gan3}",
225
+ "224": "查{cha2}",
226
+ "225": "查{zha1}",
227
+ "226": "校{jiao4}",
228
+ "227": "校{xiao4}",
229
+ "228": "模{mo2}",
230
+ "229": "模{mu2}",
231
+ "230": "横{heng2}",
232
+ "231": "横{heng4}",
233
+ "232": "没{mei2}",
234
+ "233": "没{mo4}",
235
+ "234": "泡{pao1}",
236
+ "235": "泡{pao4}",
237
+ "236": "泡{pao5}",
238
+ "237": "济{ji3}",
239
+ "238": "济{ji4}",
240
+ "239": "混{hun2}",
241
+ "240": "混{hun3}",
242
+ "241": "混{hun4}",
243
+ "242": "混{hun5}",
244
+ "243": "漂{piao1}",
245
+ "244": "漂{piao3}",
246
+ "245": "漂{piao4}",
247
+ "246": "炸{zha2}",
248
+ "247": "炸{zha4}",
249
+ "248": "熟{shou2}",
250
+ "249": "熟{shu2}",
251
+ "250": "燕{yan1}",
252
+ "251": "燕{yan4}",
253
+ "252": "片{pian1}",
254
+ "253": "片{pian4}",
255
+ "254": "率{lv4}",
256
+ "255": "率{shuai4}",
257
+ "256": "畜{chu4}",
258
+ "257": "畜{xu4}",
259
+ "258": "的{de5}",
260
+ "259": "的{di1}",
261
+ "260": "的{di2}",
262
+ "261": "的{di4}",
263
+ "262": "的{di5}",
264
+ "263": "盛{cheng2}",
265
+ "264": "盛{sheng4}",
266
+ "265": "相{xiang1}",
267
+ "266": "相{xiang4}",
268
+ "267": "相{xiang5}",
269
+ "268": "省{sheng3}",
270
+ "269": "省{xing3}",
271
+ "270": "看{kan1}",
272
+ "271": "看{kan4}",
273
+ "272": "看{kan5}",
274
+ "273": "着{zhao1}",
275
+ "274": "着{zhao2}",
276
+ "275": "着{zhao5}",
277
+ "276": "着{zhe5}",
278
+ "277": "着{zhuo2}",
279
+ "278": "着{zhuo5}",
280
+ "279": "矫{jiao3}",
281
+ "280": "禁{jin1}",
282
+ "281": "禁{jin4}",
283
+ "282": "种{zhong3}",
284
+ "283": "种{zhong4}",
285
+ "284": "称{chen4}",
286
+ "285": "称{cheng1}",
287
+ "286": "空{kong1}",
288
+ "287": "空{kong4}",
289
+ "288": "答{da1}",
290
+ "289": "答{da2}",
291
+ "290": "粘{nian2}",
292
+ "291": "粘{zhan1}",
293
+ "292": "糊{hu2}",
294
+ "293": "糊{hu5}",
295
+ "294": "系{ji4}",
296
+ "295": "系{xi4}",
297
+ "296": "系{xi5}",
298
+ "297": "累{lei2}",
299
+ "298": "累{lei3}",
300
+ "299": "累{lei4}",
301
+ "300": "累{lei5}",
302
+ "301": "纤{qian4}",
303
+ "302": "纤{xian1}",
304
+ "303": "结{jie1}",
305
+ "304": "结{jie2}",
306
+ "305": "结{jie5}",
307
+ "306": "给{gei3}",
308
+ "307": "给{gei5}",
309
+ "308": "给{ji3}",
310
+ "309": "缝{feng2}",
311
+ "310": "缝{feng4}",
312
+ "311": "缝{feng5}",
313
+ "312": "肖{xiao1}",
314
+ "313": "肖{xiao4}",
315
+ "314": "背{bei1}",
316
+ "315": "背{bei4}",
317
+ "316": "脏{zang1}",
318
+ "317": "脏{zang4}",
319
+ "318": "舍{she3}",
320
+ "319": "舍{she4}",
321
+ "320": "色{se4}",
322
+ "321": "色{shai3}",
323
+ "322": "落{lao4}",
324
+ "323": "落{luo4}",
325
+ "324": "蒙{meng1}",
326
+ "325": "蒙{meng2}",
327
+ "326": "蒙{meng3}",
328
+ "327": "薄{bao2}",
329
+ "328": "薄{bo2}",
330
+ "329": "薄{bo4}",
331
+ "330": "藏{cang2}",
332
+ "331": "藏{zang4}",
333
+ "332": "血{xie3}",
334
+ "333": "血{xue4}",
335
+ "334": "行{hang2}",
336
+ "335": "行{hang5}",
337
+ "336": "行{heng5}",
338
+ "337": "行{xing2}",
339
+ "338": "行{xing4}",
340
+ "339": "要{yao1}",
341
+ "340": "要{yao4}",
342
+ "341": "观{guan1}",
343
+ "342": "观{guan4}",
344
+ "343": "觉{jiao4}",
345
+ "344": "觉{jiao5}",
346
+ "345": "觉{jve2}",
347
+ "346": "角{jiao3}",
348
+ "347": "角{jve2}",
349
+ "348": "解{jie3}",
350
+ "349": "解{jie4}",
351
+ "350": "解{xie4}",
352
+ "351": "说{shui4}",
353
+ "352": "说{shuo1}",
354
+ "353": "调{diao4}",
355
+ "354": "调{tiao2}",
356
+ "355": "踏{ta1}",
357
+ "356": "踏{ta4}",
358
+ "357": "车{che1}",
359
+ "358": "车{jv1}",
360
+ "359": "转{zhuan3}",
361
+ "360": "转{zhuan4}",
362
+ "361": "载{zai3}",
363
+ "362": "载{zai4}",
364
+ "363": "还{hai2}",
365
+ "364": "还{huan2}",
366
+ "365": "遂{sui2}",
367
+ "366": "遂{sui4}",
368
+ "367": "都{dou1}",
369
+ "368": "都{du1}",
370
+ "369": "重{chong2}",
371
+ "370": "重{zhong4}",
372
+ "371": "量{liang2}",
373
+ "372": "量{liang4}",
374
+ "373": "量{liang5}",
375
+ "374": "钻{zuan1}",
376
+ "375": "钻{zuan4}",
377
+ "376": "铺{pu1}",
378
+ "377": "铺{pu4}",
379
+ "378": "长{chang2}",
380
+ "379": "长{chang3}",
381
+ "380": "长{zhang3}",
382
+ "381": "间{jian1}",
383
+ "382": "间{jian4}",
384
+ "383": "降{jiang4}",
385
+ "384": "降{xiang2}",
386
+ "385": "难{nan2}",
387
+ "386": "难{nan4}",
388
+ "387": "难{nan5}",
389
+ "388": "露{lou4}",
390
+ "389": "露{lu4}",
391
+ "390": "鲜{xian1}",
392
+ "391": "鲜{xian3}"
393
+ }
diffrhythm/g2p/sources/g2p_chinese_model/polydict_r.json ADDED
@@ -0,0 +1,393 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "丧{sang1}": 1,
3
+ "丧{sang4}": 2,
4
+ "中{zhong1}": 3,
5
+ "中{zhong4}": 4,
6
+ "为{wei2}": 5,
7
+ "为{wei4}": 6,
8
+ "乌{wu1}": 7,
9
+ "乌{wu4}": 8,
10
+ "乐{lao4}": 9,
11
+ "乐{le4}": 10,
12
+ "乐{le5}": 11,
13
+ "乐{yao4}": 12,
14
+ "乐{yve4}": 13,
15
+ "了{le5}": 14,
16
+ "了{liao3}": 15,
17
+ "了{liao5}": 16,
18
+ "什{shen2}": 17,
19
+ "什{shi2}": 18,
20
+ "仔{zai3}": 19,
21
+ "仔{zai5}": 20,
22
+ "仔{zi3}": 21,
23
+ "仔{zi5}": 22,
24
+ "令{ling2}": 23,
25
+ "令{ling4}": 24,
26
+ "任{ren2}": 25,
27
+ "任{ren4}": 26,
28
+ "会{hui4}": 27,
29
+ "会{hui5}": 28,
30
+ "会{kuai4}": 29,
31
+ "传{chuan2}": 30,
32
+ "传{zhuan4}": 31,
33
+ "佛{fo2}": 32,
34
+ "佛{fu2}": 33,
35
+ "供{gong1}": 34,
36
+ "供{gong4}": 35,
37
+ "便{bian4}": 36,
38
+ "便{pian2}": 37,
39
+ "倒{dao3}": 38,
40
+ "倒{dao4}": 39,
41
+ "假{jia3}": 40,
42
+ "假{jia4}": 41,
43
+ "兴{xing1}": 42,
44
+ "兴{xing4}": 43,
45
+ "冠{guan1}": 44,
46
+ "冠{guan4}": 45,
47
+ "冲{chong1}": 46,
48
+ "冲{chong4}": 47,
49
+ "几{ji1}": 48,
50
+ "几{ji2}": 49,
51
+ "几{ji3}": 50,
52
+ "分{fen1}": 51,
53
+ "分{fen4}": 52,
54
+ "分{fen5}": 53,
55
+ "切{qie1}": 54,
56
+ "切{qie4}": 55,
57
+ "划{hua2}": 56,
58
+ "划{hua4}": 57,
59
+ "划{hua5}": 58,
60
+ "创{chuang1}": 59,
61
+ "创{chuang4}": 60,
62
+ "剥{bao1}": 61,
63
+ "剥{bo1}": 62,
64
+ "勒{le4}": 63,
65
+ "勒{le5}": 64,
66
+ "勒{lei1}": 65,
67
+ "区{ou1}": 66,
68
+ "区{qu1}": 67,
69
+ "华{hua2}": 68,
70
+ "华{hua4}": 69,
71
+ "单{chan2}": 70,
72
+ "单{dan1}": 71,
73
+ "单{shan4}": 72,
74
+ "卜{bo5}": 73,
75
+ "卜{bu3}": 74,
76
+ "占{zhan1}": 75,
77
+ "占{zhan4}": 76,
78
+ "卡{ka2}": 77,
79
+ "卡{ka3}": 78,
80
+ "卡{qia3}": 79,
81
+ "卷{jvan3}": 80,
82
+ "卷{jvan4}": 81,
83
+ "厦{sha4}": 82,
84
+ "厦{xia4}": 83,
85
+ "参{can1}": 84,
86
+ "参{cen1}": 85,
87
+ "参{shen1}": 86,
88
+ "发{fa1}": 87,
89
+ "发{fa4}": 88,
90
+ "发{fa5}": 89,
91
+ "只{zhi1}": 90,
92
+ "只{zhi3}": 91,
93
+ "号{hao2}": 92,
94
+ "号{hao4}": 93,
95
+ "号{hao5}": 94,
96
+ "同{tong2}": 95,
97
+ "同{tong4}": 96,
98
+ "同{tong5}": 97,
99
+ "吐{tu2}": 98,
100
+ "吐{tu3}": 99,
101
+ "吐{tu4}": 100,
102
+ "和{he2}": 101,
103
+ "和{he4}": 102,
104
+ "和{he5}": 103,
105
+ "和{huo2}": 104,
106
+ "和{huo4}": 105,
107
+ "和{huo5}": 106,
108
+ "喝{he1}": 107,
109
+ "喝{he4}": 108,
110
+ "圈{jvan4}": 109,
111
+ "圈{qvan1}": 110,
112
+ "圈{qvan5}": 111,
113
+ "地{de5}": 112,
114
+ "地{di4}": 113,
115
+ "地{di5}": 114,
116
+ "塞{sai1}": 115,
117
+ "塞{sai2}": 116,
118
+ "塞{sai4}": 117,
119
+ "塞{se4}": 118,
120
+ "壳{ke2}": 119,
121
+ "壳{qiao4}": 120,
122
+ "处{chu3}": 121,
123
+ "处{chu4}": 122,
124
+ "奇{ji1}": 123,
125
+ "奇{qi2}": 124,
126
+ "奔{ben1}": 125,
127
+ "奔{ben4}": 126,
128
+ "好{hao3}": 127,
129
+ "好{hao4}": 128,
130
+ "好{hao5}": 129,
131
+ "宁{ning2}": 130,
132
+ "宁{ning4}": 131,
133
+ "宁{ning5}": 132,
134
+ "宿{su4}": 133,
135
+ "宿{xiu3}": 134,
136
+ "宿{xiu4}": 135,
137
+ "将{jiang1}": 136,
138
+ "将{jiang4}": 137,
139
+ "少{shao3}": 138,
140
+ "少{shao4}": 139,
141
+ "尽{jin3}": 140,
142
+ "尽{jin4}": 141,
143
+ "岗{gang1}": 142,
144
+ "岗{gang3}": 143,
145
+ "差{cha1}": 144,
146
+ "差{cha4}": 145,
147
+ "差{chai1}": 146,
148
+ "差{ci1}": 147,
149
+ "巷{hang4}": 148,
150
+ "巷{xiang4}": 149,
151
+ "帖{tie1}": 150,
152
+ "帖{tie3}": 151,
153
+ "帖{tie4}": 152,
154
+ "干{gan1}": 153,
155
+ "干{gan4}": 154,
156
+ "应{ying1}": 155,
157
+ "应{ying4}": 156,
158
+ "应{ying5}": 157,
159
+ "度{du4}": 158,
160
+ "度{du5}": 159,
161
+ "度{duo2}": 160,
162
+ "弹{dan4}": 161,
163
+ "弹{tan2}": 162,
164
+ "弹{tan5}": 163,
165
+ "强{jiang4}": 164,
166
+ "强{qiang2}": 165,
167
+ "强{qiang3}": 166,
168
+ "当{dang1}": 167,
169
+ "当{dang4}": 168,
170
+ "当{dang5}": 169,
171
+ "待{dai1}": 170,
172
+ "待{dai4}": 171,
173
+ "得{de2}": 172,
174
+ "得{de5}": 173,
175
+ "得{dei3}": 174,
176
+ "得{dei5}": 175,
177
+ "恶{e3}": 176,
178
+ "恶{e4}": 177,
179
+ "恶{wu4}": 178,
180
+ "扁{bian3}": 179,
181
+ "扁{pian1}": 180,
182
+ "扇{shan1}": 181,
183
+ "扇{shan4}": 182,
184
+ "扎{za1}": 183,
185
+ "扎{zha1}": 184,
186
+ "扎{zha2}": 185,
187
+ "扫{sao3}": 186,
188
+ "扫{sao4}": 187,
189
+ "担{dan1}": 188,
190
+ "担{dan4}": 189,
191
+ "担{dan5}": 190,
192
+ "挑{tiao1}": 191,
193
+ "挑{tiao3}": 192,
194
+ "据{jv1}": 193,
195
+ "据{jv4}": 194,
196
+ "撒{sa1}": 195,
197
+ "撒{sa3}": 196,
198
+ "撒{sa5}": 197,
199
+ "教{jiao1}": 198,
200
+ "教{jiao4}": 199,
201
+ "散{san3}": 200,
202
+ "散{san4}": 201,
203
+ "散{san5}": 202,
204
+ "数{shu3}": 203,
205
+ "数{shu4}": 204,
206
+ "数{shu5}": 205,
207
+ "斗{dou3}": 206,
208
+ "斗{dou4}": 207,
209
+ "晃{huang3}": 208,
210
+ "曝{bao4}": 209,
211
+ "曲{qu1}": 210,
212
+ "曲{qu3}": 211,
213
+ "更{geng1}": 212,
214
+ "更{geng4}": 213,
215
+ "曾{ceng1}": 214,
216
+ "曾{ceng2}": 215,
217
+ "曾{zeng1}": 216,
218
+ "朝{chao2}": 217,
219
+ "朝{zhao1}": 218,
220
+ "朴{piao2}": 219,
221
+ "朴{pu2}": 220,
222
+ "朴{pu3}": 221,
223
+ "杆{gan1}": 222,
224
+ "杆{gan3}": 223,
225
+ "查{cha2}": 224,
226
+ "查{zha1}": 225,
227
+ "校{jiao4}": 226,
228
+ "校{xiao4}": 227,
229
+ "模{mo2}": 228,
230
+ "模{mu2}": 229,
231
+ "横{heng2}": 230,
232
+ "横{heng4}": 231,
233
+ "没{mei2}": 232,
234
+ "没{mo4}": 233,
235
+ "泡{pao1}": 234,
236
+ "泡{pao4}": 235,
237
+ "泡{pao5}": 236,
238
+ "济{ji3}": 237,
239
+ "济{ji4}": 238,
240
+ "混{hun2}": 239,
241
+ "混{hun3}": 240,
242
+ "混{hun4}": 241,
243
+ "混{hun5}": 242,
244
+ "漂{piao1}": 243,
245
+ "漂{piao3}": 244,
246
+ "漂{piao4}": 245,
247
+ "炸{zha2}": 246,
248
+ "炸{zha4}": 247,
249
+ "熟{shou2}": 248,
250
+ "熟{shu2}": 249,
251
+ "燕{yan1}": 250,
252
+ "燕{yan4}": 251,
253
+ "片{pian1}": 252,
254
+ "片{pian4}": 253,
255
+ "率{lv4}": 254,
256
+ "率{shuai4}": 255,
257
+ "畜{chu4}": 256,
258
+ "畜{xu4}": 257,
259
+ "的{de5}": 258,
260
+ "的{di1}": 259,
261
+ "的{di2}": 260,
262
+ "的{di4}": 261,
263
+ "的{di5}": 262,
264
+ "盛{cheng2}": 263,
265
+ "盛{sheng4}": 264,
266
+ "相{xiang1}": 265,
267
+ "相{xiang4}": 266,
268
+ "相{xiang5}": 267,
269
+ "省{sheng3}": 268,
270
+ "省{xing3}": 269,
271
+ "看{kan1}": 270,
272
+ "看{kan4}": 271,
273
+ "看{kan5}": 272,
274
+ "着{zhao1}": 273,
275
+ "着{zhao2}": 274,
276
+ "着{zhao5}": 275,
277
+ "着{zhe5}": 276,
278
+ "着{zhuo2}": 277,
279
+ "着{zhuo5}": 278,
280
+ "矫{jiao3}": 279,
281
+ "禁{jin1}": 280,
282
+ "禁{jin4}": 281,
283
+ "种{zhong3}": 282,
284
+ "种{zhong4}": 283,
285
+ "称{chen4}": 284,
286
+ "称{cheng1}": 285,
287
+ "空{kong1}": 286,
288
+ "空{kong4}": 287,
289
+ "答{da1}": 288,
290
+ "答{da2}": 289,
291
+ "粘{nian2}": 290,
292
+ "粘{zhan1}": 291,
293
+ "糊{hu2}": 292,
294
+ "糊{hu5}": 293,
295
+ "系{ji4}": 294,
296
+ "系{xi4}": 295,
297
+ "系{xi5}": 296,
298
+ "累{lei2}": 297,
299
+ "累{lei3}": 298,
300
+ "累{lei4}": 299,
301
+ "累{lei5}": 300,
302
+ "纤{qian4}": 301,
303
+ "纤{xian1}": 302,
304
+ "结{jie1}": 303,
305
+ "结{jie2}": 304,
306
+ "结{jie5}": 305,
307
+ "给{gei3}": 306,
308
+ "给{gei5}": 307,
309
+ "给{ji3}": 308,
310
+ "缝{feng2}": 309,
311
+ "缝{feng4}": 310,
312
+ "缝{feng5}": 311,
313
+ "肖{xiao1}": 312,
314
+ "肖{xiao4}": 313,
315
+ "背{bei1}": 314,
316
+ "背{bei4}": 315,
317
+ "脏{zang1}": 316,
318
+ "脏{zang4}": 317,
319
+ "舍{she3}": 318,
320
+ "舍{she4}": 319,
321
+ "色{se4}": 320,
322
+ "色{shai3}": 321,
323
+ "落{lao4}": 322,
324
+ "落{luo4}": 323,
325
+ "蒙{meng1}": 324,
326
+ "蒙{meng2}": 325,
327
+ "蒙{meng3}": 326,
328
+ "薄{bao2}": 327,
329
+ "薄{bo2}": 328,
330
+ "薄{bo4}": 329,
331
+ "藏{cang2}": 330,
332
+ "藏{zang4}": 331,
333
+ "血{xie3}": 332,
334
+ "血{xue4}": 333,
335
+ "行{hang2}": 334,
336
+ "行{hang5}": 335,
337
+ "行{heng5}": 336,
338
+ "行{xing2}": 337,
339
+ "行{xing4}": 338,
340
+ "要{yao1}": 339,
341
+ "要{yao4}": 340,
342
+ "观{guan1}": 341,
343
+ "观{guan4}": 342,
344
+ "觉{jiao4}": 343,
345
+ "觉{jiao5}": 344,
346
+ "觉{jve2}": 345,
347
+ "角{jiao3}": 346,
348
+ "角{jve2}": 347,
349
+ "解{jie3}": 348,
350
+ "解{jie4}": 349,
351
+ "解{xie4}": 350,
352
+ "说{shui4}": 351,
353
+ "说{shuo1}": 352,
354
+ "调{diao4}": 353,
355
+ "调{tiao2}": 354,
356
+ "踏{ta1}": 355,
357
+ "踏{ta4}": 356,
358
+ "车{che1}": 357,
359
+ "车{jv1}": 358,
360
+ "转{zhuan3}": 359,
361
+ "转{zhuan4}": 360,
362
+ "载{zai3}": 361,
363
+ "载{zai4}": 362,
364
+ "还{hai2}": 363,
365
+ "还{huan2}": 364,
366
+ "遂{sui2}": 365,
367
+ "遂{sui4}": 366,
368
+ "都{dou1}": 367,
369
+ "都{du1}": 368,
370
+ "重{chong2}": 369,
371
+ "重{zhong4}": 370,
372
+ "量{liang2}": 371,
373
+ "量{liang4}": 372,
374
+ "量{liang5}": 373,
375
+ "钻{zuan1}": 374,
376
+ "钻{zuan4}": 375,
377
+ "铺{pu1}": 376,
378
+ "铺{pu4}": 377,
379
+ "长{chang2}": 378,
380
+ "长{chang3}": 379,
381
+ "长{zhang3}": 380,
382
+ "间{jian1}": 381,
383
+ "间{jian4}": 382,
384
+ "降{jiang4}": 383,
385
+ "降{xiang2}": 384,
386
+ "难{nan2}": 385,
387
+ "难{nan4}": 386,
388
+ "难{nan5}": 387,
389
+ "露{lou4}": 388,
390
+ "露{lu4}": 389,
391
+ "鲜{xian1}": 390,
392
+ "鲜{xian3}": 391
393
+ }
diffrhythm/g2p/sources/g2p_chinese_model/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
diffrhythm/g2p/sources/pinyin_2_bpmf.txt ADDED
@@ -0,0 +1,429 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ a ㄚ
2
+ ai ㄞ
3
+ an ㄢ
4
+ ang ㄤ
5
+ ao ㄠ
6
+ ba ㄅㄚ
7
+ bai ㄅㄞ
8
+ ban ㄅㄢ
9
+ bang ㄅㄤ
10
+ bao ㄅㄠ
11
+ bei ㄅㄟ
12
+ ben ㄅㄣ
13
+ beng ㄅㄥ
14
+ bi ㄅㄧ
15
+ bian ㄅㄧㄢ
16
+ biang ㄅㄧㄤ
17
+ biao ㄅㄧㄠ
18
+ bie ㄅㄧㄝ
19
+ bin ㄅㄧㄣ
20
+ bing ㄅㄧㄥ
21
+ bo ㄅㄛ
22
+ bu ㄅㄨ
23
+ ca ㄘㄚ
24
+ cai ㄘㄞ
25
+ can ㄘㄢ
26
+ cang ㄘㄤ
27
+ cao ㄘㄠ
28
+ ce ㄘㄜ
29
+ cen ㄘㄣ
30
+ ceng ㄘㄥ
31
+ cha ㄔㄚ
32
+ chai ㄔㄞ
33
+ chan ㄔㄢ
34
+ chang ㄔㄤ
35
+ chao ㄔㄠ
36
+ che ㄔㄜ
37
+ chen ㄔㄣ
38
+ cheng ㄔㄥ
39
+ chi ㄔ
40
+ chong ㄔㄨㄥ
41
+ chou ㄔㄡ
42
+ chu ㄔㄨ
43
+ chua ㄔㄨㄚ
44
+ chuai ㄔㄨㄞ
45
+ chuan ㄔㄨㄢ
46
+ chuang ㄔㄨㄤ
47
+ chui ㄔㄨㄟ
48
+ chun ㄔㄨㄣ
49
+ chuo ㄔㄨㄛ
50
+ ci ㄘ
51
+ cong ㄘㄨㄥ
52
+ cou ㄘㄡ
53
+ cu ㄘㄨ
54
+ cuan ㄘㄨㄢ
55
+ cui ㄘㄨㄟ
56
+ cun ㄘㄨㄣ
57
+ cuo ㄘㄨㄛ
58
+ da ㄉㄚ
59
+ dai ㄉㄞ
60
+ dan ㄉㄢ
61
+ dang ㄉㄤ
62
+ dao ㄉㄠ
63
+ de ㄉㄜ
64
+ dei ㄉㄟ
65
+ den ㄉㄣ
66
+ deng ㄉㄥ
67
+ di ㄉㄧ
68
+ dia ㄉㄧㄚ
69
+ dian ㄉㄧㄢ
70
+ diao ㄉㄧㄠ
71
+ die ㄉㄧㄝ
72
+ din ㄉㄧㄣ
73
+ ding ㄉㄧㄥ
74
+ diu ㄉㄧㄡ
75
+ dong ㄉㄨㄥ
76
+ dou ㄉㄡ
77
+ du ㄉㄨ
78
+ duan ㄉㄨㄢ
79
+ dui ㄉㄨㄟ
80
+ dun ㄉㄨㄣ
81
+ duo ㄉㄨㄛ
82
+ e ㄜ
83
+ ei ㄟ
84
+ en ㄣ
85
+ eng ㄥ
86
+ er ㄦ
87
+ fa ㄈㄚ
88
+ fan ㄈㄢ
89
+ fang ㄈㄤ
90
+ fei ㄈㄟ
91
+ fen ㄈㄣ
92
+ feng ㄈㄥ
93
+ fo ㄈㄛ
94
+ fou ㄈㄡ
95
+ fu ㄈㄨ
96
+ ga ㄍㄚ
97
+ gai ㄍㄞ
98
+ gan ㄍㄢ
99
+ gang ㄍㄤ
100
+ gao ㄍㄠ
101
+ ge ㄍㄜ
102
+ gei ㄍㄟ
103
+ gen ㄍㄣ
104
+ geng ㄍㄥ
105
+ gong ㄍㄨㄥ
106
+ gou ㄍㄡ
107
+ gu ㄍㄨ
108
+ gua ㄍㄨㄚ
109
+ guai ㄍㄨㄞ
110
+ guan ㄍㄨㄢ
111
+ guang ㄍㄨㄤ
112
+ gui ㄍㄨㄟ
113
+ gun ㄍㄨㄣ
114
+ guo ㄍㄨㄛ
115
+ ha ㄏㄚ
116
+ hai ㄏㄞ
117
+ han ㄏㄢ
118
+ hang ㄏㄤ
119
+ hao ㄏㄠ
120
+ he ㄏㄜ
121
+ hei ㄏㄟ
122
+ hen ㄏㄣ
123
+ heng ㄏㄥ
124
+ hm ㄏㄇ
125
+ hong ㄏㄨㄥ
126
+ hou ㄏㄡ
127
+ hu ㄏㄨ
128
+ hua ㄏㄨㄚ
129
+ huai ㄏㄨㄞ
130
+ huan ㄏㄨㄢ
131
+ huang ㄏㄨㄤ
132
+ hui ㄏㄨㄟ
133
+ hun ㄏㄨㄣ
134
+ huo ㄏㄨㄛ
135
+ ji ㄐㄧ
136
+ jia ㄐㄧㄚ
137
+ jian ㄐㄧㄢ
138
+ jiang ㄐㄧㄤ
139
+ jiao ㄐㄧㄠ
140
+ jie ㄐㄧㄝ
141
+ jin ㄐㄧㄣ
142
+ jing ㄐㄧㄥ
143
+ jiong ㄐㄩㄥ
144
+ jiu ㄐㄧㄡ
145
+ ju ㄐㄩ
146
+ jv ㄐㄩ
147
+ juan ㄐㄩㄢ
148
+ jvan ㄐㄩㄢ
149
+ jue ㄐㄩㄝ
150
+ jve ㄐㄩㄝ
151
+ jun ㄐㄩㄣ
152
+ ka ㄎㄚ
153
+ kai ㄎㄞ
154
+ kan ㄎㄢ
155
+ kang ㄎㄤ
156
+ kao ㄎㄠ
157
+ ke ㄎㄜ
158
+ kei ㄎㄟ
159
+ ken ㄎㄣ
160
+ keng ㄎㄥ
161
+ kong ㄎㄨㄥ
162
+ kou ㄎㄡ
163
+ ku ㄎㄨ
164
+ kua ㄎㄨㄚ
165
+ kuai ㄎㄨㄞ
166
+ kuan ㄎㄨㄢ
167
+ kuang ㄎㄨㄤ
168
+ kui ㄎㄨㄟ
169
+ kun ㄎㄨㄣ
170
+ kuo ㄎㄨㄛ
171
+ la ㄌㄚ
172
+ lai ㄌㄞ
173
+ lan ㄌㄢ
174
+ lang ㄌㄤ
175
+ lao ㄌㄠ
176
+ le ㄌㄜ
177
+ lei ㄌㄟ
178
+ leng ㄌㄥ
179
+ li ㄌㄧ
180
+ lia ㄌㄧㄚ
181
+ lian ㄌㄧㄢ
182
+ liang ㄌㄧㄤ
183
+ liao ㄌㄧㄠ
184
+ lie ㄌㄧㄝ
185
+ lin ㄌㄧㄣ
186
+ ling ㄌㄧㄥ
187
+ liu ㄌㄧㄡ
188
+ lo ㄌㄛ
189
+ long ㄌㄨㄥ
190
+ lou ㄌㄡ
191
+ lu ㄌㄨ
192
+ luan ㄌㄨㄢ
193
+ lue ㄌㄩㄝ
194
+ lun ㄌㄨㄣ
195
+ luo ㄌㄨㄛ
196
+ lv ㄌㄩ
197
+ lve ㄌㄩㄝ
198
+ m ㄇㄨ
199
+ ma ㄇㄚ
200
+ mai ㄇㄞ
201
+ man ㄇㄢ
202
+ mang ㄇㄤ
203
+ mao ㄇㄠ
204
+ me ㄇㄜ
205
+ mei ㄇㄟ
206
+ men ㄇㄣ
207
+ meng ㄇㄥ
208
+ mi ㄇㄧ
209
+ mian ㄇㄧㄢ
210
+ miao ㄇㄧㄠ
211
+ mie ㄇㄧㄝ
212
+ min ㄇㄧㄣ
213
+ ming ㄇㄧㄥ
214
+ miu ㄇㄧㄡ
215
+ mo ㄇㄛ
216
+ mou ㄇㄡ
217
+ mu ㄇㄨ
218
+ n ㄣ
219
+ na ㄋㄚ
220
+ nai ㄋㄞ
221
+ nan ㄋㄢ
222
+ nang ㄋㄤ
223
+ nao ㄋㄠ
224
+ ne ㄋㄜ
225
+ nei ㄋㄟ
226
+ nen ㄋㄣ
227
+ neng ㄋㄥ
228
+ ng ㄣ
229
+ ni ㄋㄧ
230
+ nian ㄋㄧㄢ
231
+ niang ㄋㄧㄤ
232
+ niao ㄋㄧㄠ
233
+ nie ㄋㄧㄝ
234
+ nin ㄋㄧㄣ
235
+ ning ㄋㄧㄥ
236
+ niu ㄋㄧㄡ
237
+ nong ㄋㄨㄥ
238
+ nou ㄋㄡ
239
+ nu ㄋㄨ
240
+ nuan ㄋㄨㄢ
241
+ nue ㄋㄩㄝ
242
+ nun ㄋㄨㄣ
243
+ nuo ㄋㄨㄛ
244
+ nv ㄋㄩ
245
+ nve ㄋㄩㄝ
246
+ o ㄛ
247
+ ou ㄡ
248
+ pa ㄆㄚ
249
+ pai ㄆㄞ
250
+ pan ㄆㄢ
251
+ pang ㄆㄤ
252
+ pao ㄆㄠ
253
+ pei ㄆㄟ
254
+ pen ㄆㄣ
255
+ peng ㄆㄥ
256
+ pi ㄆㄧ
257
+ pian ㄆㄧㄢ
258
+ piao ㄆㄧㄠ
259
+ pie ㄆㄧㄝ
260
+ pin ㄆㄧㄣ
261
+ ping ㄆㄧㄥ
262
+ po ㄆㄛ
263
+ pou ㄆㄡ
264
+ pu ㄆㄨ
265
+ qi ㄑㄧ
266
+ qia ㄑㄧㄚ
267
+ qian ㄑㄧㄢ
268
+ qiang ㄑㄧㄤ
269
+ qiao ㄑㄧㄠ
270
+ qie ㄑㄧㄝ
271
+ qin ㄑㄧㄣ
272
+ qing ㄑㄧㄥ
273
+ qiong ㄑㄩㄥ
274
+ qiu ㄑㄧㄡ
275
+ qu ㄑㄩ
276
+ quan ㄑㄩㄢ
277
+ qvan ㄑㄩㄢ
278
+ que ㄑㄩㄝ
279
+ qun ㄑㄩㄣ
280
+ ran ㄖㄢ
281
+ rang ㄖㄤ
282
+ rao ㄖㄠ
283
+ re ㄖㄜ
284
+ ren ㄖㄣ
285
+ reng ㄖㄥ
286
+ ri ㄖ
287
+ rong ㄖㄨㄥ
288
+ rou ㄖㄡ
289
+ ru ㄖㄨ
290
+ rua ㄖㄨㄚ
291
+ ruan ㄖㄨㄢ
292
+ rui ㄖㄨㄟ
293
+ run ㄖㄨㄣ
294
+ ruo ㄖㄨㄛ
295
+ sa ㄙㄚ
296
+ sai ㄙㄞ
297
+ san ㄙㄢ
298
+ sang ㄙㄤ
299
+ sao ㄙㄠ
300
+ se ㄙㄜ
301
+ sen ㄙㄣ
302
+ seng ㄙㄥ
303
+ sha ㄕㄚ
304
+ shai ㄕㄞ
305
+ shan ㄕㄢ
306
+ shang ㄕㄤ
307
+ shao ㄕㄠ
308
+ she ㄕㄜ
309
+ shei ㄕㄟ
310
+ shen ㄕㄣ
311
+ sheng ㄕㄥ
312
+ shi ㄕ
313
+ shou ㄕㄡ
314
+ shu ㄕㄨ
315
+ shua ㄕㄨㄚ
316
+ shuai ㄕㄨㄞ
317
+ shuan ㄕㄨㄢ
318
+ shuang ㄕㄨㄤ
319
+ shui ㄕㄨㄟ
320
+ shun ㄕㄨㄣ
321
+ shuo ㄕㄨㄛ
322
+ si ㄙ
323
+ song ㄙㄨㄥ
324
+ sou ㄙㄡ
325
+ su ㄙㄨ
326
+ suan ㄙㄨㄢ
327
+ sui ㄙㄨㄟ
328
+ sun ㄙㄨㄣ
329
+ suo ㄙㄨㄛ
330
+ ta ㄊㄚ
331
+ tai ㄊㄞ
332
+ tan ㄊㄢ
333
+ tang ㄊㄤ
334
+ tao ㄊㄠ
335
+ te ㄊㄜ
336
+ tei ㄊㄟ
337
+ teng ㄊㄥ
338
+ ti ㄊㄧ
339
+ tian ㄊㄧㄢ
340
+ tiao ㄊㄧㄠ
341
+ tie ㄊㄧㄝ
342
+ ting ㄊㄧㄥ
343
+ tong ㄊㄨㄥ
344
+ tou ㄊㄡ
345
+ tsuo ㄘㄨㄛ
346
+ tu ㄊㄨ
347
+ tuan ㄊㄨㄢ
348
+ tui ㄊㄨㄟ
349
+ tun ㄊㄨㄣ
350
+ tuo ㄊㄨㄛ
351
+ tzan ㄗㄢ
352
+ wa ㄨㄚ
353
+ wai ㄨㄞ
354
+ wan ㄨㄢ
355
+ wang ㄨㄤ
356
+ wei ㄨㄟ
357
+ wen ㄨㄣ
358
+ weng ㄨㄥ
359
+ wo ㄨㄛ
360
+ wong ㄨㄥ
361
+ wu ㄨ
362
+ xi ㄒㄧ
363
+ xia ㄒㄧㄚ
364
+ xian ㄒㄧㄢ
365
+ xiang ㄒㄧㄤ
366
+ xiao ㄒㄧㄠ
367
+ xie ㄒㄧㄝ
368
+ xin ㄒㄧㄣ
369
+ xing ㄒㄧㄥ
370
+ xiong ㄒㄩㄥ
371
+ xiu ㄒㄧㄡ
372
+ xu ㄒㄩ
373
+ xuan ㄒㄩㄢ
374
+ xue ㄒㄩㄝ
375
+ xun ㄒㄩㄣ
376
+ ya ㄧㄚ
377
+ yai ㄧㄞ
378
+ yan ㄧㄢ
379
+ yang ㄧㄤ
380
+ yao ㄧㄠ
381
+ ye ㄧㄝ
382
+ yi ㄧ
383
+ yin ㄧㄣ
384
+ ying ㄧㄥ
385
+ yo ㄧㄛ
386
+ yong ㄩㄥ
387
+ you ㄧㄡ
388
+ yu ㄩ
389
+ yuan ㄩㄢ
390
+ yue ㄩㄝ
391
+ yve ㄩㄝ
392
+ yun ㄩㄣ
393
+ za ㄗㄚ
394
+ zai ㄗㄞ
395
+ zan ㄗㄢ
396
+ zang ㄗㄤ
397
+ zao ㄗㄠ
398
+ ze ㄗㄜ
399
+ zei ㄗㄟ
400
+ zen ㄗㄣ
401
+ zeng ㄗㄥ
402
+ zha ㄓㄚ
403
+ zhai ㄓㄞ
404
+ zhan ㄓㄢ
405
+ zhang ㄓㄤ
406
+ zhao ㄓㄠ
407
+ zhe ㄓㄜ
408
+ zhei ㄓㄟ
409
+ zhen ㄓㄣ
410
+ zheng ㄓㄥ
411
+ zhi ㄓ
412
+ zhong ㄓㄨㄥ
413
+ zhou ㄓㄡ
414
+ zhu ㄓㄨ
415
+ zhua ㄓㄨㄚ
416
+ zhuai ㄓㄨㄞ
417
+ zhuan ㄓㄨㄢ
418
+ zhuang ㄓㄨㄤ
419
+ zhui ㄓㄨㄟ
420
+ zhun ㄓㄨㄣ
421
+ zhuo ㄓㄨㄛ
422
+ zi ㄗ
423
+ zong ㄗㄨㄥ
424
+ zou ㄗㄡ
425
+ zu ㄗㄨ
426
+ zuan ㄗㄨㄢ
427
+ zui ㄗㄨㄟ
428
+ zun ㄗㄨㄣ
429
+ zuo ㄗㄨㄛ
diffrhythm/g2p/utils/__pycache__/front_utils.cpython-310.pyc ADDED
Binary file (583 Bytes). View file
 
diffrhythm/g2p/utils/__pycache__/front_utils.cpython-311.pyc ADDED
Binary file (960 Bytes). View file
 
diffrhythm/g2p/utils/__pycache__/g2p.cpython-310.pyc ADDED
Binary file (2.78 kB). View file