|
|
|
|
|
|
|
|
|
|
|
import re |
|
import jieba |
|
import cn2an |
|
from pypinyin import lazy_pinyin, BOPOMOFO |
|
from typing import List |
|
from diffrhythm.g2p.g2p.chinese_model_g2p import BertPolyPredict |
|
from diffrhythm.g2p.utils.front_utils import * |
|
import os |
|
from huggingface_hub import hf_hub_download |
|
|
|
|
|
|
|
|
|
|
|
BLANK_LEVEL = 0 |
|
|
|
|
|
resource_path = r"./diffrhythm/g2p" |
|
poly_all_class_path = os.path.join( |
|
resource_path, "sources", "g2p_chinese_model", "polychar.txt" |
|
) |
|
if not os.path.exists(poly_all_class_path): |
|
print( |
|
"Incorrect path for polyphonic character class dictionary: {}, please check...".format( |
|
poly_all_class_path |
|
) |
|
) |
|
exit() |
|
poly_dict = generate_poly_lexicon(poly_all_class_path) |
|
|
|
|
|
g2pw_poly_model_path = os.path.join(resource_path, "sources", "g2p_chinese_model") |
|
if not os.path.exists(g2pw_poly_model_path): |
|
print( |
|
"Incorrect path for g2pw polyphonic character model: {}, please check...".format( |
|
g2pw_poly_model_path |
|
) |
|
) |
|
exit() |
|
|
|
json_file_path = os.path.join( |
|
resource_path, "sources", "g2p_chinese_model", "polydict.json" |
|
) |
|
if not os.path.exists(json_file_path): |
|
print( |
|
"Incorrect path for g2pw id to pinyin dictionary: {}, please check...".format( |
|
json_file_path |
|
) |
|
) |
|
exit() |
|
|
|
jsonr_file_path = os.path.join( |
|
resource_path, "sources", "g2p_chinese_model", "polydict_r.json" |
|
) |
|
if not os.path.exists(jsonr_file_path): |
|
print( |
|
"Incorrect path for g2pw pinyin to id dictionary: {}, please check...".format( |
|
jsonr_file_path |
|
) |
|
) |
|
exit() |
|
|
|
g2pw_poly_predict = BertPolyPredict( |
|
g2pw_poly_model_path, jsonr_file_path, json_file_path |
|
) |
|
|
|
|
|
""" |
|
Text clean time |
|
""" |
|
|
|
_latin_to_bopomofo = [ |
|
(re.compile("%s" % x[0], re.IGNORECASE), x[1]) |
|
for x in [ |
|
("a", "γΛ"), |
|
("b", "γ
γ§Λ"), |
|
("c", "γγ§Λ"), |
|
("d", "γγ§Λ"), |
|
("e", "γ§Λ"), |
|
("f", "γΛγγ¨Λ"), |
|
("g", "γγ§Λ"), |
|
("h", "γΛγγ©Λ"), |
|
("i", "γΛ"), |
|
("j", "γγΛ"), |
|
("k", "γγΛ"), |
|
("l", "γΛγΛ"), |
|
("m", "γΛγγ¨Λ"), |
|
("n", "γ£Λ"), |
|
("o", "γ‘Λ"), |
|
("p", "γγ§Λ"), |
|
("q", "γγ§γ‘Λ"), |
|
("r", "γΛ"), |
|
("s", "γΛγΛ"), |
|
("t", "γγ§Λ"), |
|
("u", "γ§γ‘Λ"), |
|
("v", "γ¨γ§Λ"), |
|
("w", "γγΛγ
γ¨Λγγ§γ‘Λ"), |
|
("x", "γΛγγ¨ΛγΛ"), |
|
("y", "γ¨γΛ"), |
|
("z", "γγΛ"), |
|
] |
|
] |
|
|
|
|
|
_bopomofo_to_ipa = [ |
|
(re.compile("%s" % x[0]), x[1]) |
|
for x in [ |
|
("γ
γ", "pβΌwo"), |
|
("γγ", "pΚ°wo"), |
|
("γγ", "mwo"), |
|
("γγ", "fwo"), |
|
("γ§γ’", "|jΙn"), |
|
("γ©γ’", "|Ι₯Γ¦n"), |
|
("γ§γ£", "|in"), |
|
("γ©γ£", "|Ι₯n"), |
|
("γ§γ₯", "|iΕ"), |
|
("γ¨γ₯", "|ΚΕ"), |
|
("γ©γ₯", "|jΚΕ"), |
|
|
|
("γ§γ", "|ia"), |
|
("γ§γ", "|iΙ"), |
|
("γ§γ ", "|iΙΚ"), |
|
("γ§γ‘", "|ioΚ"), |
|
("γ§γ€", "|iΙΕ"), |
|
("γ¨γ", "|ua"), |
|
("γ¨γ", "|uo"), |
|
("γ¨γ", "|uaΙͺ"), |
|
("γ¨γ", "|ueΙͺ"), |
|
("γ¨γ’", "|uan"), |
|
("γ¨γ£", "|uΙn"), |
|
("γ¨γ€", "|uΙΕ"), |
|
("γ©γ", "|Ι₯Ι"), |
|
|
|
("γ
", "pβΌ"), |
|
("γ", "pΚ°"), |
|
("γ", "m"), |
|
("γ", "f"), |
|
("γ", "tβΌ"), |
|
("γ", "tΚ°"), |
|
("γ", "n"), |
|
("γ", "l"), |
|
("γ", "kβΌ"), |
|
("γ", "kΚ°"), |
|
("γ", "x"), |
|
("γ", "tΚβΌ"), |
|
("γ", "tΚΚ°"), |
|
("γ", "Κ"), |
|
("γ", "ts`βΌ"), |
|
("γ", "ts`Κ°"), |
|
("γ", "s`"), |
|
("γ", "ΙΉ`"), |
|
("γ", "tsβΌ"), |
|
("γ", "tsΚ°"), |
|
("γ", "|s"), |
|
("γ", "|a"), |
|
("γ", "|o"), |
|
("γ", "|Ι"), |
|
("γ", "|Ι"), |
|
("γ", "|aΙͺ"), |
|
("γ", "|eΙͺ"), |
|
("γ ", "|ΙΚ"), |
|
("γ‘", "|oΚ"), |
|
("γ’", "|an"), |
|
("γ£", "|Ιn"), |
|
("γ€", "|ΙΕ"), |
|
("γ₯", "|ΙΕ"), |
|
("γ¦", "ΙΙΉ"), |
|
("γ§", "|i"), |
|
("γ¨", "|u"), |
|
("γ©", "|Ι₯"), |
|
("Λ", "β|"), |
|
("Λ", "β|"), |
|
("Λ", "ββ|"), |
|
("Λ", "β|"), |
|
("Λ", "|"), |
|
] |
|
] |
|
must_not_er_words = {"ε₯³εΏ", "θεΏ", "η·εΏ", "ε°εΏ", "ε°εΏ"} |
|
|
|
|
|
chinese_lexicon_path = hf_hub_download( |
|
repo_id="ASLP-lab/DiffRhythm", |
|
filename="diffrhythm/g2p/sources/chinese_lexicon.txt", |
|
repo_type="space" |
|
) |
|
word_pinyin_dict = {} |
|
with open(chinese_lexicon_path, "r", encoding="utf-8") as fread: |
|
txt_list = fread.readlines() |
|
for txt in txt_list: |
|
word, pinyin = txt.strip().split("\t") |
|
word_pinyin_dict[word] = pinyin |
|
fread.close() |
|
|
|
pinyin_2_bopomofo_dict = {} |
|
with open( |
|
r"./diffrhythm/g2p/sources/pinyin_2_bpmf.txt", "r", encoding="utf-8" |
|
) as fread: |
|
txt_list = fread.readlines() |
|
for txt in txt_list: |
|
pinyin, bopomofo = txt.strip().split("\t") |
|
pinyin_2_bopomofo_dict[pinyin] = bopomofo |
|
fread.close() |
|
|
|
tone_dict = { |
|
"0": "Λ", |
|
"5": "Λ", |
|
"1": "", |
|
"2": "Λ", |
|
"3": "Λ", |
|
"4": "Λ", |
|
} |
|
|
|
bopomofos2pinyin_dict = {} |
|
with open( |
|
r"./diffrhythm/g2p/sources/bpmf_2_pinyin.txt", "r", encoding="utf-8" |
|
) as fread: |
|
txt_list = fread.readlines() |
|
for txt in txt_list: |
|
v, k = txt.strip().split("\t") |
|
bopomofos2pinyin_dict[k] = v |
|
fread.close() |
|
|
|
|
|
def bpmf_to_pinyin(text): |
|
bopomofo_list = text.split("|") |
|
pinyin_list = [] |
|
for info in bopomofo_list: |
|
pinyin = "" |
|
for c in info: |
|
if c in bopomofos2pinyin_dict: |
|
pinyin += bopomofos2pinyin_dict[c] |
|
if len(pinyin) == 0: |
|
continue |
|
if pinyin[-1] not in "01234": |
|
pinyin += "1" |
|
if pinyin[:-1] == "ve": |
|
pinyin = "y" + pinyin |
|
if pinyin[:-1] == "sh": |
|
pinyin = pinyin[:-1] + "i" + pinyin[-1] |
|
if pinyin == "sh": |
|
pinyin = pinyin[:-1] + "i" |
|
if pinyin[:-1] == "s": |
|
pinyin = "si" + pinyin[-1] |
|
if pinyin[:-1] == "c": |
|
pinyin = "ci" + pinyin[-1] |
|
if pinyin[:-1] == "i": |
|
pinyin = "yi" + pinyin[-1] |
|
if pinyin[:-1] == "iou": |
|
pinyin = "you" + pinyin[-1] |
|
if pinyin[:-1] == "ien": |
|
pinyin = "yin" + pinyin[-1] |
|
if "iou" in pinyin and pinyin[-4:-1] == "iou": |
|
pinyin = pinyin[:-4] + "iu" + pinyin[-1] |
|
if "uei" in pinyin: |
|
if pinyin[:-1] == "uei": |
|
pinyin = "wei" + pinyin[-1] |
|
elif pinyin[-4:-1] == "uei": |
|
pinyin = pinyin[:-4] + "ui" + pinyin[-1] |
|
if "uen" in pinyin and pinyin[-4:-1] == "uen": |
|
if pinyin[:-1] == "uen": |
|
pinyin = "wen" + pinyin[-1] |
|
elif pinyin[-4:-1] == "uei": |
|
pinyin = pinyin[:-4] + "un" + pinyin[-1] |
|
if "van" in pinyin and pinyin[-4:-1] == "van": |
|
if pinyin[:-1] == "van": |
|
pinyin = "yuan" + pinyin[-1] |
|
elif pinyin[-4:-1] == "van": |
|
pinyin = pinyin[:-4] + "uan" + pinyin[-1] |
|
if "ueng" in pinyin and pinyin[-5:-1] == "ueng": |
|
pinyin = pinyin[:-5] + "ong" + pinyin[-1] |
|
if pinyin[:-1] == "veng": |
|
pinyin = "yong" + pinyin[-1] |
|
if "veng" in pinyin and pinyin[-5:-1] == "veng": |
|
pinyin = pinyin[:-5] + "iong" + pinyin[-1] |
|
if pinyin[:-1] == "ieng": |
|
pinyin = "ying" + pinyin[-1] |
|
if pinyin[:-1] == "u": |
|
pinyin = "wu" + pinyin[-1] |
|
if pinyin[:-1] == "v": |
|
pinyin = "yv" + pinyin[-1] |
|
if pinyin[:-1] == "ing": |
|
pinyin = "ying" + pinyin[-1] |
|
if pinyin[:-1] == "z": |
|
pinyin = "zi" + pinyin[-1] |
|
if pinyin[:-1] == "zh": |
|
pinyin = "zhi" + pinyin[-1] |
|
if pinyin[0] == "u": |
|
pinyin = "w" + pinyin[1:] |
|
if pinyin[0] == "i": |
|
pinyin = "y" + pinyin[1:] |
|
pinyin = pinyin.replace("ien", "in") |
|
|
|
pinyin_list.append(pinyin) |
|
return " ".join(pinyin_list) |
|
|
|
|
|
|
|
def number_to_chinese(text): |
|
|
|
|
|
|
|
text = cn2an.transform(text, "an2cn") |
|
return text |
|
|
|
|
|
def normalization(text): |
|
text = text.replace("οΌ", ",") |
|
text = text.replace("γ", ".") |
|
text = text.replace("οΌ", "!") |
|
text = text.replace("οΌ", "?") |
|
text = text.replace("οΌ", ";") |
|
text = text.replace("οΌ", ":") |
|
text = text.replace("γ", ",") |
|
text = text.replace("β", "'") |
|
text = text.replace("β", "'") |
|
text = text.replace("β―", "β¦") |
|
text = text.replace("Β·Β·Β·", "β¦") |
|
text = text.replace("γ»γ»γ»", "β¦") |
|
text = text.replace("...", "β¦") |
|
text = re.sub(r"\s+", "", text) |
|
text = re.sub(r"[^\u4e00-\u9fff\s_,\.\?!;:\'β¦]", "", text) |
|
text = re.sub(r"\s*([,\.\?!;:\'β¦])\s*", r"\1", text) |
|
return text |
|
|
|
|
|
def change_tone(bopomofo: str, tone: str) -> str: |
|
if bopomofo[-1] not in "ΛΛΛΛ": |
|
bopomofo = bopomofo + tone |
|
else: |
|
bopomofo = bopomofo[:-1] + tone |
|
return bopomofo |
|
|
|
|
|
def er_sandhi(word: str, bopomofos: List[str]) -> List[str]: |
|
if len(word) > 1 and word[-1] == "εΏ" and word not in must_not_er_words: |
|
bopomofos[-1] = change_tone(bopomofos[-1], "Λ") |
|
return bopomofos |
|
|
|
|
|
def bu_sandhi(word: str, bopomofos: List[str]) -> List[str]: |
|
valid_char = set(word) |
|
if len(valid_char) == 1 and "δΈ" in valid_char: |
|
pass |
|
elif word in ["δΈε"]: |
|
pass |
|
elif len(word) == 3 and word[1] == "δΈ" and bopomofos[1][:-1] == "γ
γ¨": |
|
bopomofos[1] = bopomofos[1][:-1] + "Λ" |
|
else: |
|
for i, char in enumerate(word): |
|
if ( |
|
i + 1 < len(bopomofos) |
|
and char == "δΈ" |
|
and i + 1 < len(word) |
|
and 0 < len(bopomofos[i + 1]) |
|
and bopomofos[i + 1][-1] == "Λ" |
|
): |
|
bopomofos[i] = bopomofos[i][:-1] + "Λ" |
|
return bopomofos |
|
|
|
|
|
def yi_sandhi(word: str, bopomofos: List[str]) -> List[str]: |
|
punc = "οΌοΌοΌγοΌοΌββββ':,;.?!()οΌοΌ{}γγ[]-~`γ " |
|
if word.find("δΈ") != -1 and any( |
|
[item.isnumeric() for item in word if item != "δΈ"] |
|
): |
|
for i in range(len(word)): |
|
if ( |
|
i == 0 |
|
and word[0] == "δΈ" |
|
and len(word) > 1 |
|
and word[1] |
|
not in [ |
|
"ιΆ", |
|
"δΈ", |
|
"δΊ", |
|
"δΈ", |
|
"ε", |
|
"δΊ", |
|
"ε
", |
|
"δΈ", |
|
"ε
«", |
|
"δΉ", |
|
"ε", |
|
] |
|
): |
|
if len(bopomofos[0]) > 0 and bopomofos[1][-1] in ["Λ", "Λ"]: |
|
bopomofos[0] = change_tone(bopomofos[0], "Λ") |
|
else: |
|
bopomofos[0] = change_tone(bopomofos[0], "Λ") |
|
elif word[i] == "δΈ": |
|
bopomofos[i] = change_tone(bopomofos[i], "") |
|
return bopomofos |
|
elif len(word) == 3 and word[1] == "δΈ" and word[0] == word[-1]: |
|
bopomofos[1] = change_tone(bopomofos[1], "Λ") |
|
elif word.startswith("第δΈ"): |
|
bopomofos[1] = change_tone(bopomofos[1], "") |
|
elif word.startswith("δΈζ") or word.startswith("δΈζ₯") or word.startswith("δΈε·"): |
|
bopomofos[0] = change_tone(bopomofos[0], "") |
|
else: |
|
for i, char in enumerate(word): |
|
if char == "δΈ" and i + 1 < len(word): |
|
if ( |
|
len(bopomofos) > i + 1 |
|
and len(bopomofos[i + 1]) > 0 |
|
and bopomofos[i + 1][-1] in {"Λ"} |
|
): |
|
bopomofos[i] = change_tone(bopomofos[i], "Λ") |
|
else: |
|
if word[i + 1] not in punc: |
|
bopomofos[i] = change_tone(bopomofos[i], "Λ") |
|
else: |
|
pass |
|
return bopomofos |
|
|
|
|
|
def merge_bu(seg: List) -> List: |
|
new_seg = [] |
|
last_word = "" |
|
for word in seg: |
|
if word != "δΈ": |
|
if last_word == "δΈ": |
|
word = last_word + word |
|
new_seg.append(word) |
|
last_word = word |
|
return new_seg |
|
|
|
|
|
def merge_er(seg: List) -> List: |
|
new_seg = [] |
|
for i, word in enumerate(seg): |
|
if i - 1 >= 0 and word == "εΏ": |
|
new_seg[-1] = new_seg[-1] + seg[i] |
|
else: |
|
new_seg.append(word) |
|
return new_seg |
|
|
|
|
|
def merge_yi(seg: List) -> List: |
|
new_seg = [] |
|
|
|
for i, word in enumerate(seg): |
|
if ( |
|
i - 1 >= 0 |
|
and word == "δΈ" |
|
and i + 1 < len(seg) |
|
and seg[i - 1] == seg[i + 1] |
|
): |
|
if i - 1 < len(new_seg): |
|
new_seg[i - 1] = new_seg[i - 1] + "δΈ" + new_seg[i - 1] |
|
else: |
|
new_seg.append(word) |
|
new_seg.append(seg[i + 1]) |
|
else: |
|
if i - 2 >= 0 and seg[i - 1] == "δΈ" and seg[i - 2] == word: |
|
continue |
|
else: |
|
new_seg.append(word) |
|
seg = new_seg |
|
new_seg = [] |
|
isnumeric_flag = False |
|
for i, word in enumerate(seg): |
|
if all([item.isnumeric() for item in word]) and not isnumeric_flag: |
|
isnumeric_flag = True |
|
new_seg.append(word) |
|
else: |
|
new_seg.append(word) |
|
seg = new_seg |
|
new_seg = [] |
|
|
|
for i, word in enumerate(seg): |
|
if new_seg and new_seg[-1] == "δΈ": |
|
new_seg[-1] = new_seg[-1] + word |
|
else: |
|
new_seg.append(word) |
|
return new_seg |
|
|
|
|
|
|
|
def chinese_to_bopomofo(text_short, sentence): |
|
|
|
words = jieba.lcut(text_short, cut_all=False) |
|
words = merge_yi(words) |
|
words = merge_bu(words) |
|
words = merge_er(words) |
|
text = "" |
|
|
|
char_index = 0 |
|
for word in words: |
|
bopomofos = [] |
|
if word in word_pinyin_dict and word not in poly_dict: |
|
pinyin = word_pinyin_dict[word] |
|
for py in pinyin.split(" "): |
|
if py[:-1] in pinyin_2_bopomofo_dict and py[-1] in tone_dict: |
|
bopomofos.append( |
|
pinyin_2_bopomofo_dict[py[:-1]] + tone_dict[py[-1]] |
|
) |
|
if BLANK_LEVEL == 1: |
|
bopomofos.append("_") |
|
else: |
|
bopomofos_lazy = lazy_pinyin(word, BOPOMOFO) |
|
bopomofos += bopomofos_lazy |
|
if BLANK_LEVEL == 1: |
|
bopomofos.append("_") |
|
else: |
|
for i in range(len(word)): |
|
c = word[i] |
|
if c in poly_dict: |
|
poly_pinyin = g2pw_poly_predict.predict_process( |
|
[text_short, char_index + i] |
|
)[0] |
|
py = poly_pinyin[2:-1] |
|
bopomofos.append( |
|
pinyin_2_bopomofo_dict[py[:-1]] + tone_dict[py[-1]] |
|
) |
|
if BLANK_LEVEL == 1: |
|
bopomofos.append("_") |
|
elif c in word_pinyin_dict: |
|
py = word_pinyin_dict[c] |
|
bopomofos.append( |
|
pinyin_2_bopomofo_dict[py[:-1]] + tone_dict[py[-1]] |
|
) |
|
if BLANK_LEVEL == 1: |
|
bopomofos.append("_") |
|
else: |
|
bopomofos.append(c) |
|
if BLANK_LEVEL == 1: |
|
bopomofos.append("_") |
|
if BLANK_LEVEL == 2: |
|
bopomofos.append("_") |
|
char_index += len(word) |
|
|
|
if ( |
|
len(word) == 3 |
|
and bopomofos[0][-1] == "Λ" |
|
and bopomofos[1][-1] == "Λ" |
|
and bopomofos[-1][-1] == "Λ" |
|
): |
|
bopomofos[0] = bopomofos[0] + "Λ" |
|
bopomofos[1] = bopomofos[1] + "Λ" |
|
if len(word) == 2 and bopomofos[0][-1] == "Λ" and bopomofos[-1][-1] == "Λ": |
|
bopomofos[0] = bopomofos[0][:-1] + "Λ" |
|
bopomofos = bu_sandhi(word, bopomofos) |
|
bopomofos = yi_sandhi(word, bopomofos) |
|
bopomofos = er_sandhi(word, bopomofos) |
|
if not re.search("[\u4e00-\u9fff]", word): |
|
text += "|" + word |
|
continue |
|
for i in range(len(bopomofos)): |
|
bopomofos[i] = re.sub(r"([\u3105-\u3129])$", r"\1Λ", bopomofos[i]) |
|
if text != "": |
|
text += "|" |
|
text += "|".join(bopomofos) |
|
return text |
|
|
|
|
|
|
|
def latin_to_bopomofo(text): |
|
for regex, replacement in _latin_to_bopomofo: |
|
text = re.sub(regex, replacement, text) |
|
return text |
|
|
|
|
|
|
|
def bopomofo_to_ipa(text): |
|
for regex, replacement in _bopomofo_to_ipa: |
|
text = re.sub(regex, replacement, text) |
|
return text |
|
|
|
|
|
def _chinese_to_ipa(text, sentence): |
|
text = number_to_chinese(text.strip()) |
|
text = normalization(text) |
|
text = chinese_to_bopomofo(text, sentence) |
|
|
|
text = latin_to_bopomofo(text) |
|
text = bopomofo_to_ipa(text) |
|
text = re.sub("([sΙΉ]`[βΌΚ°]?)([βββ ]+|$)", r"\1ΙΉ\2", text) |
|
text = re.sub("([s][βΌΚ°]?)([βββ ]+|$)", r"\1ΙΉ\2", text) |
|
text = re.sub(r"^\||[^\w\s_,\.\?!;:\'β¦\|ββββΌΚ°`]", "", text) |
|
text = re.sub(r"([,\.\?!;:\'β¦])", r"|\1|", text) |
|
text = re.sub(r"\|+", "|", text) |
|
text = text.rstrip("|") |
|
return text |
|
|
|
|
|
|
|
def chinese_to_ipa(text, sentence, text_tokenizer): |
|
|
|
if type(text) == str: |
|
return _chinese_to_ipa(text, sentence) |
|
else: |
|
result_ph = [] |
|
for t in text: |
|
result_ph.append(_chinese_to_ipa(t, sentence)) |
|
return result_ph |
|
|