Spaces:
Running
on
Zero
Running
on
Zero
# Copyright (c) 2024 Amphion. | |
# | |
# This source code is licensed under the MIT license found in the | |
# LICENSE file in the root directory of this source tree. | |
import io, re, os, sys, time, argparse, pdb, json | |
from io import StringIO | |
from typing import Optional | |
import numpy as np | |
import traceback | |
import pyopenjtalk | |
from pykakasi import kakasi | |
punctuation = [",", ".", "!", "?", ":", ";", "'", "…"] | |
jp_xphone2ipa = [ | |
" a a", | |
" i i", | |
" u ɯ", | |
" e e", | |
" o o", | |
" a: aː", | |
" i: iː", | |
" u: ɯː", | |
" e: eː", | |
" o: oː", | |
" k k", | |
" s s", | |
" t t", | |
" n n", | |
" h ç", | |
" f ɸ", | |
" m m", | |
" y j", | |
" r ɾ", | |
" w ɰᵝ", | |
" N ɴ", | |
" g g", | |
" j d ʑ", | |
" z z", | |
" d d", | |
" b b", | |
" p p", | |
" q q", | |
" v v", | |
" : :", | |
" by b j", | |
" ch t ɕ", | |
" dy d e j", | |
" ty t e j", | |
" gy g j", | |
" gw g ɯ", | |
" hy ç j", | |
" ky k j", | |
" kw k ɯ", | |
" my m j", | |
" ny n j", | |
" py p j", | |
" ry ɾ j", | |
" sh ɕ", | |
" ts t s ɯ", | |
] | |
_mora_list_minimum: list[tuple[str, Optional[str], str]] = [ | |
("ヴォ", "v", "o"), | |
("ヴェ", "v", "e"), | |
("ヴィ", "v", "i"), | |
("ヴァ", "v", "a"), | |
("ヴ", "v", "u"), | |
("ン", None, "N"), | |
("ワ", "w", "a"), | |
("ロ", "r", "o"), | |
("レ", "r", "e"), | |
("ル", "r", "u"), | |
("リョ", "ry", "o"), | |
("リュ", "ry", "u"), | |
("リャ", "ry", "a"), | |
("リェ", "ry", "e"), | |
("リ", "r", "i"), | |
("ラ", "r", "a"), | |
("ヨ", "y", "o"), | |
("ユ", "y", "u"), | |
("ヤ", "y", "a"), | |
("モ", "m", "o"), | |
("メ", "m", "e"), | |
("ム", "m", "u"), | |
("ミョ", "my", "o"), | |
("ミュ", "my", "u"), | |
("ミャ", "my", "a"), | |
("ミェ", "my", "e"), | |
("ミ", "m", "i"), | |
("マ", "m", "a"), | |
("ポ", "p", "o"), | |
("ボ", "b", "o"), | |
("ホ", "h", "o"), | |
("ペ", "p", "e"), | |
("ベ", "b", "e"), | |
("ヘ", "h", "e"), | |
("プ", "p", "u"), | |
("ブ", "b", "u"), | |
("フォ", "f", "o"), | |
("フェ", "f", "e"), | |
("フィ", "f", "i"), | |
("ファ", "f", "a"), | |
("フ", "f", "u"), | |
("ピョ", "py", "o"), | |
("ピュ", "py", "u"), | |
("ピャ", "py", "a"), | |
("ピェ", "py", "e"), | |
("ピ", "p", "i"), | |
("ビョ", "by", "o"), | |
("ビュ", "by", "u"), | |
("ビャ", "by", "a"), | |
("ビェ", "by", "e"), | |
("ビ", "b", "i"), | |
("ヒョ", "hy", "o"), | |
("ヒュ", "hy", "u"), | |
("ヒャ", "hy", "a"), | |
("ヒェ", "hy", "e"), | |
("ヒ", "h", "i"), | |
("パ", "p", "a"), | |
("バ", "b", "a"), | |
("ハ", "h", "a"), | |
("ノ", "n", "o"), | |
("ネ", "n", "e"), | |
("ヌ", "n", "u"), | |
("ニョ", "ny", "o"), | |
("ニュ", "ny", "u"), | |
("ニャ", "ny", "a"), | |
("ニェ", "ny", "e"), | |
("ニ", "n", "i"), | |
("ナ", "n", "a"), | |
("ドゥ", "d", "u"), | |
("ド", "d", "o"), | |
("トゥ", "t", "u"), | |
("ト", "t", "o"), | |
("デョ", "dy", "o"), | |
("デュ", "dy", "u"), | |
("デャ", "dy", "a"), | |
# ("デェ", "dy", "e"), | |
("ディ", "d", "i"), | |
("デ", "d", "e"), | |
("テョ", "ty", "o"), | |
("テュ", "ty", "u"), | |
("テャ", "ty", "a"), | |
("ティ", "t", "i"), | |
("テ", "t", "e"), | |
("ツォ", "ts", "o"), | |
("ツェ", "ts", "e"), | |
("ツィ", "ts", "i"), | |
("ツァ", "ts", "a"), | |
("ツ", "ts", "u"), | |
("ッ", None, "q"), # 「cl」から「q」に変更 | |
("チョ", "ch", "o"), | |
("チュ", "ch", "u"), | |
("チャ", "ch", "a"), | |
("チェ", "ch", "e"), | |
("チ", "ch", "i"), | |
("ダ", "d", "a"), | |
("タ", "t", "a"), | |
("ゾ", "z", "o"), | |
("ソ", "s", "o"), | |
("ゼ", "z", "e"), | |
("セ", "s", "e"), | |
("ズィ", "z", "i"), | |
("ズ", "z", "u"), | |
("スィ", "s", "i"), | |
("ス", "s", "u"), | |
("ジョ", "j", "o"), | |
("ジュ", "j", "u"), | |
("ジャ", "j", "a"), | |
("ジェ", "j", "e"), | |
("ジ", "j", "i"), | |
("ショ", "sh", "o"), | |
("シュ", "sh", "u"), | |
("シャ", "sh", "a"), | |
("シェ", "sh", "e"), | |
("シ", "sh", "i"), | |
("ザ", "z", "a"), | |
("サ", "s", "a"), | |
("ゴ", "g", "o"), | |
("コ", "k", "o"), | |
("ゲ", "g", "e"), | |
("ケ", "k", "e"), | |
("グヮ", "gw", "a"), | |
("グ", "g", "u"), | |
("クヮ", "kw", "a"), | |
("ク", "k", "u"), | |
("ギョ", "gy", "o"), | |
("ギュ", "gy", "u"), | |
("ギャ", "gy", "a"), | |
("ギェ", "gy", "e"), | |
("ギ", "g", "i"), | |
("キョ", "ky", "o"), | |
("キュ", "ky", "u"), | |
("キャ", "ky", "a"), | |
("キェ", "ky", "e"), | |
("キ", "k", "i"), | |
("ガ", "g", "a"), | |
("カ", "k", "a"), | |
("オ", None, "o"), | |
("エ", None, "e"), | |
("ウォ", "w", "o"), | |
("ウェ", "w", "e"), | |
("ウィ", "w", "i"), | |
("ウ", None, "u"), | |
("イェ", "y", "e"), | |
("イ", None, "i"), | |
("ア", None, "a"), | |
] | |
_mora_list_additional: list[tuple[str, Optional[str], str]] = [ | |
("ヴョ", "by", "o"), | |
("ヴュ", "by", "u"), | |
("ヴャ", "by", "a"), | |
("ヲ", None, "o"), | |
("ヱ", None, "e"), | |
("ヰ", None, "i"), | |
("ヮ", "w", "a"), | |
("ョ", "y", "o"), | |
("ュ", "y", "u"), | |
("ヅ", "z", "u"), | |
("ヂ", "j", "i"), | |
("ヶ", "k", "e"), | |
("ャ", "y", "a"), | |
("ォ", None, "o"), | |
("ェ", None, "e"), | |
("ゥ", None, "u"), | |
("ィ", None, "i"), | |
("ァ", None, "a"), | |
] | |
# 例: "vo" -> "ヴォ", "a" -> "ア" | |
mora_phonemes_to_mora_kata: dict[str, str] = { | |
(consonant or "") + vowel: kana for [kana, consonant, vowel] in _mora_list_minimum | |
} | |
# 例: "ヴォ" -> ("v", "o"), "ア" -> (None, "a") | |
mora_kata_to_mora_phonemes: dict[str, tuple[Optional[str], str]] = { | |
kana: (consonant, vowel) | |
for [kana, consonant, vowel] in _mora_list_minimum + _mora_list_additional | |
} | |
# 正規化で記号を変換するための辞書 | |
rep_map = { | |
":": ":", | |
";": ";", | |
",": ",", | |
"。": ".", | |
"!": "!", | |
"?": "?", | |
"\n": ".", | |
".": ".", | |
"⋯": "…", | |
"···": "…", | |
"・・・": "…", | |
"·": ",", | |
"・": ",", | |
"•": ",", | |
"、": ",", | |
"$": ".", | |
# "“": "'", | |
# "”": "'", | |
# '"': "'", | |
"‘": "'", | |
"’": "'", | |
# "(": "'", | |
# ")": "'", | |
# "(": "'", | |
# ")": "'", | |
# "《": "'", | |
# "》": "'", | |
# "【": "'", | |
# "】": "'", | |
# "[": "'", | |
# "]": "'", | |
# "——": "-", | |
# "−": "-", | |
# "-": "-", | |
# "『": "'", | |
# "』": "'", | |
# "〈": "'", | |
# "〉": "'", | |
# "«": "'", | |
# "»": "'", | |
# # "~": "-", # これは長音記号「ー」として扱うよう変更 | |
# # "~": "-", # これは長音記号「ー」として扱うよう変更 | |
# "「": "'", | |
# "」": "'", | |
} | |
def _numeric_feature_by_regex(regex, s): | |
match = re.search(regex, s) | |
if match is None: | |
return -50 | |
return int(match.group(1)) | |
def replace_punctuation(text: str) -> str: | |
"""句読点等を「.」「,」「!」「?」「'」「-」に正規化し、OpenJTalkで読みが取得できるもののみ残す: | |
漢字・平仮名・カタカナ、アルファベット、ギリシャ文字 | |
""" | |
pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys())) | |
# print("before: ", text) | |
# 句読点を辞書で置換 | |
replaced_text = pattern.sub(lambda x: rep_map[x.group()], text) | |
replaced_text = re.sub( | |
# ↓ ひらがな、カタカナ、漢字 | |
r"[^\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF\u3400-\u4DBF\u3005" | |
# ↓ 半角アルファベット(大文字と小文字) | |
+ r"\u0041-\u005A\u0061-\u007A" | |
# ↓ 全角アルファベット(大文字と小文字) | |
+ r"\uFF21-\uFF3A\uFF41-\uFF5A" | |
# ↓ ギリシャ文字 | |
+ r"\u0370-\u03FF\u1F00-\u1FFF" | |
# ↓ "!", "?", "…", ",", ".", "'", "-", 但し`…`はすでに`...`に変換されている | |
+ "".join(punctuation) + r"]+", | |
# 上述以外の文字を削除 | |
"", | |
replaced_text, | |
) | |
# print("after: ", replaced_text) | |
return replaced_text | |
def fix_phone_tone(phone_tone_list: list[tuple[str, int]]) -> list[tuple[str, int]]: | |
""" | |
`phone_tone_list`のtone(アクセントの値)を0か1の範囲に修正する。 | |
例: [(a, 0), (i, -1), (u, -1)] → [(a, 1), (i, 0), (u, 0)] | |
""" | |
tone_values = set(tone for _, tone in phone_tone_list) | |
if len(tone_values) == 1: | |
assert tone_values == {0}, tone_values | |
return phone_tone_list | |
elif len(tone_values) == 2: | |
if tone_values == {0, 1}: | |
return phone_tone_list | |
elif tone_values == {-1, 0}: | |
return [ | |
(letter, 0 if tone == -1 else 1) for letter, tone in phone_tone_list | |
] | |
else: | |
raise ValueError(f"Unexpected tone values: {tone_values}") | |
else: | |
raise ValueError(f"Unexpected tone values: {tone_values}") | |
def fix_phone_tone_wplen(phone_tone_list, word_phone_length_list): | |
phones = [] | |
tones = [] | |
w_p_len = [] | |
p_len = len(phone_tone_list) | |
idx = 0 | |
w_idx = 0 | |
while idx < p_len: | |
offset = 0 | |
if phone_tone_list[idx] == "▁": | |
w_p_len.append(w_idx + 1) | |
curr_w_p_len = word_phone_length_list[w_idx] | |
for i in range(curr_w_p_len): | |
p, t = phone_tone_list[idx] | |
if p == ":" and len(phones) > 0: | |
if phones[-1][-1] != ":": | |
phones[-1] += ":" | |
offset -= 1 | |
else: | |
phones.append(p) | |
tones.append(str(t)) | |
idx += 1 | |
if idx >= p_len: | |
break | |
w_p_len.append(curr_w_p_len + offset) | |
w_idx += 1 | |
# print(w_p_len) | |
return phones, tones, w_p_len | |
def g2phone_tone_wo_punct(prosodies) -> list[tuple[str, int]]: | |
""" | |
テキストに対して、音素とアクセント(0か1)のペアのリストを返す。 | |
ただし「!」「.」「?」等の非音素記号(punctuation)は全て消える(ポーズ記号も残さない)。 | |
非音素記号を含める処理は`align_tones()`で行われる。 | |
また「っ」は「cl」でなく「q」に変換される(「ん」は「N」のまま)。 | |
例: "こんにちは、世界ー。。元気?!" → | |
[('k', 0), ('o', 0), ('N', 1), ('n', 1), ('i', 1), ('ch', 1), ('i', 1), ('w', 1), ('a', 1), ('s', 1), ('e', 1), ('k', 0), ('a', 0), ('i', 0), ('i', 0), ('g', 1), ('e', 1), ('N', 0), ('k', 0), ('i', 0)] | |
""" | |
result: list[tuple[str, int]] = [] | |
current_phrase: list[tuple[str, int]] = [] | |
current_tone = 0 | |
last_accent = "" | |
for i, letter in enumerate(prosodies): | |
# 特殊記号の処理 | |
# 文頭記号、無視する | |
if letter == "^": | |
assert i == 0, "Unexpected ^" | |
# アクセント句の終わりに来る記号 | |
elif letter in ("$", "?", "_", "#"): | |
# 保持しているフレーズを、アクセント数値を0-1に修正し結果に追加 | |
result.extend(fix_phone_tone(current_phrase)) | |
# 末尾に来る終了記号、無視(文中の疑問文は`_`になる) | |
if letter in ("$", "?"): | |
assert i == len(prosodies) - 1, f"Unexpected {letter}" | |
# あとは"_"(ポーズ)と"#"(アクセント句の境界)のみ | |
# これらは残さず、次のアクセント句に備える。 | |
current_phrase = [] | |
# 0を基準点にしてそこから上昇・下降する(負の場合は上の`fix_phone_tone`で直る) | |
current_tone = 0 | |
last_accent = "" | |
# アクセント上昇記号 | |
elif letter == "[": | |
if last_accent != letter: | |
current_tone = current_tone + 1 | |
last_accent = letter | |
# アクセント下降記号 | |
elif letter == "]": | |
if last_accent != letter: | |
current_tone = current_tone - 1 | |
last_accent = letter | |
# それ以外は通常の音素 | |
else: | |
if letter == "cl": # 「っ」の処理 | |
letter = "q" | |
current_phrase.append((letter, current_tone)) | |
return result | |
def handle_long(sep_phonemes: list[list[str]]) -> list[list[str]]: | |
for i in range(len(sep_phonemes)): | |
if sep_phonemes[i][0] == "ー": | |
# sep_phonemes[i][0] = sep_phonemes[i - 1][-1] | |
sep_phonemes[i][0] = ":" | |
if "ー" in sep_phonemes[i]: | |
for j in range(len(sep_phonemes[i])): | |
if sep_phonemes[i][j] == "ー": | |
# sep_phonemes[i][j] = sep_phonemes[i][j - 1][-1] | |
sep_phonemes[i][j] = ":" | |
return sep_phonemes | |
def handle_long_word(sep_phonemes: list[list[str]]) -> list[list[str]]: | |
res = [] | |
for i in range(len(sep_phonemes)): | |
if sep_phonemes[i][0] == "ー": | |
sep_phonemes[i][0] = sep_phonemes[i - 1][-1] | |
# sep_phonemes[i][0] = ':' | |
if "ー" in sep_phonemes[i]: | |
for j in range(len(sep_phonemes[i])): | |
if sep_phonemes[i][j] == "ー": | |
sep_phonemes[i][j] = sep_phonemes[i][j - 1][-1] | |
# sep_phonemes[i][j] = ':' | |
res.append(sep_phonemes[i]) | |
res.append("▁") | |
return res | |
def align_tones( | |
phones_with_punct: list[str], phone_tone_list: list[tuple[str, int]] | |
) -> list[tuple[str, int]]: | |
""" | |
例: | |
…私は、、そう思う。 | |
phones_with_punct: | |
[".", ".", ".", "w", "a", "t", "a", "sh", "i", "w", "a", ",", ",", "s", "o", "o", "o", "m", "o", "u", "."] | |
phone_tone_list: | |
[("w", 0), ("a", 0), ("t", 1), ("a", 1), ("sh", 1), ("i", 1), ("w", 1), ("a", 1), ("s", 0), ("o", 0), ("o", 1), ("o", 1), ("m", 1), ("o", 1), ("u", 0))] | |
Return: | |
[(".", 0), (".", 0), (".", 0), ("w", 0), ("a", 0), ("t", 1), ("a", 1), ("sh", 1), ("i", 1), ("w", 1), ("a", 1), (",", 0), (",", 0), ("s", 0), ("o", 0), ("o", 1), ("o", 1), ("m", 1), ("o", 1), ("u", 0), (".", 0)] | |
""" | |
result: list[tuple[str, int]] = [] | |
tone_index = 0 | |
for phone in phones_with_punct: | |
if tone_index >= len(phone_tone_list): | |
# 余ったpunctuationがある場合 → (punctuation, 0)を追加 | |
result.append((phone, 0)) | |
elif phone == phone_tone_list[tone_index][0]: | |
# phone_tone_listの現在の音素と一致する場合 → toneをそこから取得、(phone, tone)を追加 | |
result.append((phone, phone_tone_list[tone_index][1])) | |
# 探すindexを1つ進める | |
tone_index += 1 | |
elif phone in punctuation or phone == "▁": | |
# phoneがpunctuationの場合 → (phone, 0)を追加 | |
result.append((phone, 0)) | |
else: | |
print(f"phones: {phones_with_punct}") | |
print(f"phone_tone_list: {phone_tone_list}") | |
print(f"result: {result}") | |
print(f"tone_index: {tone_index}") | |
print(f"phone: {phone}") | |
raise ValueError(f"Unexpected phone: {phone}") | |
return result | |
def kata2phoneme_list(text: str) -> list[str]: | |
""" | |
原則カタカナの`text`を受け取り、それをそのままいじらずに音素記号のリストに変換。 | |
注意点: | |
- punctuationが来た場合(punctuationが1文字の場合がありうる)、処理せず1文字のリストを返す | |
- 冒頭に続く「ー」はそのまま「ー」のままにする(`handle_long()`で処理される) | |
- 文中の「ー」は前の音素記号の最後の音素記号に変換される。 | |
例: | |
`ーーソーナノカーー` → ["ー", "ー", "s", "o", "o", "n", "a", "n", "o", "k", "a", "a", "a"] | |
`?` → ["?"] | |
""" | |
if text in punctuation: | |
return [text] | |
# `text`がカタカナ(`ー`含む)のみからなるかどうかをチェック | |
if re.fullmatch(r"[\u30A0-\u30FF]+", text) is None: | |
raise ValueError(f"Input must be katakana only: {text}") | |
sorted_keys = sorted(mora_kata_to_mora_phonemes.keys(), key=len, reverse=True) | |
pattern = "|".join(map(re.escape, sorted_keys)) | |
def mora2phonemes(mora: str) -> str: | |
cosonant, vowel = mora_kata_to_mora_phonemes[mora] | |
if cosonant is None: | |
return f" {vowel}" | |
return f" {cosonant} {vowel}" | |
spaced_phonemes = re.sub(pattern, lambda m: mora2phonemes(m.group()), text) | |
# 長音記号「ー」の処理 | |
long_pattern = r"(\w)(ー*)" | |
long_replacement = lambda m: m.group(1) + (" " + m.group(1)) * len(m.group(2)) | |
spaced_phonemes = re.sub(long_pattern, long_replacement, spaced_phonemes) | |
# spaced_phonemes += ' ▁' | |
return spaced_phonemes.strip().split(" ") | |
def frontend2phoneme(labels, drop_unvoiced_vowels=False): | |
N = len(labels) | |
phones = [] | |
for n in range(N): | |
lab_curr = labels[n] | |
# print(lab_curr) | |
# current phoneme | |
p3 = re.search(r"\-(.*?)\+", lab_curr).group(1) | |
# deal unvoiced vowels as normal vowels | |
if drop_unvoiced_vowels and p3 in "AEIOU": | |
p3 = p3.lower() | |
# deal with sil at the beginning and the end of text | |
if p3 == "sil": | |
# assert n == 0 or n == N - 1 | |
# if n == 0: | |
# phones.append("^") | |
# elif n == N - 1: | |
# # check question form or not | |
# e3 = _numeric_feature_by_regex(r"!(\d+)_", lab_curr) | |
# if e3 == 0: | |
# phones.append("$") | |
# elif e3 == 1: | |
# phones.append("?") | |
continue | |
elif p3 == "pau": | |
phones.append("_") | |
continue | |
else: | |
phones.append(p3) | |
# accent type and position info (forward or backward) | |
a1 = _numeric_feature_by_regex(r"/A:([0-9\-]+)\+", lab_curr) | |
a2 = _numeric_feature_by_regex(r"\+(\d+)\+", lab_curr) | |
a3 = _numeric_feature_by_regex(r"\+(\d+)/", lab_curr) | |
# number of mora in accent phrase | |
f1 = _numeric_feature_by_regex(r"/F:(\d+)_", lab_curr) | |
a2_next = _numeric_feature_by_regex(r"\+(\d+)\+", labels[n + 1]) | |
# accent phrase border | |
# print(p3, a1, a2, a3, f1, a2_next, lab_curr) | |
if a3 == 1 and a2_next == 1 and p3 in "aeiouAEIOUNcl": | |
phones.append("#") | |
# pitch falling | |
elif a1 == 0 and a2_next == a2 + 1 and a2 != f1: | |
phones.append("]") | |
# pitch rising | |
elif a2 == 1 and a2_next == 2: | |
phones.append("[") | |
# phones = ' '.join(phones) | |
return phones | |
class JapanesePhoneConverter(object): | |
def __init__(self, lexicon_path=None, ipa_dict_path=None): | |
# lexicon_lines = open(lexicon_path, 'r', encoding='utf-8').readlines() | |
# self.lexicon = {} | |
# self.single_dict = {} | |
# self.double_dict = {} | |
# for curr_line in lexicon_lines: | |
# k,v = curr_line.strip().split('+',1) | |
# self.lexicon[k] = v | |
# if len(k) == 2: | |
# self.double_dict[k] = v | |
# elif len(k) == 1: | |
# self.single_dict[k] = v | |
self.ipa_dict = {} | |
for curr_line in jp_xphone2ipa: | |
k, v = curr_line.strip().split(" ", 1) | |
self.ipa_dict[k] = re.sub("\s", "", v) | |
# kakasi1 = kakasi() | |
# kakasi1.setMode("H","K") | |
# kakasi1.setMode("J","K") | |
# kakasi1.setMode("r","Hepburn") | |
self.japan_JH2K = kakasi() | |
self.table = {ord(f): ord(t) for f, t in zip("67", "_¯")} | |
def text2sep_kata(self, parsed) -> tuple[list[str], list[str]]: | |
""" | |
`text_normalize`で正規化済みの`norm_text`を受け取り、それを単語分割し、 | |
分割された単語リストとその読み(カタカナor記号1文字)のリストのタプルを返す。 | |
単語分割結果は、`g2p()`の`word2ph`で1文字あたりに割り振る音素記号の数を決めるために使う。 | |
例: | |
`私はそう思う!って感じ?` → | |
["私", "は", "そう", "思う", "!", "って", "感じ", "?"], ["ワタシ", "ワ", "ソー", "オモウ", "!", "ッテ", "カンジ", "?"] | |
""" | |
# parsed: OpenJTalkの解析結果 | |
sep_text: list[str] = [] | |
sep_kata: list[str] = [] | |
fix_parsed = [] | |
i = 0 | |
while i <= len(parsed) - 1: | |
# word: 実際の単語の文字列 | |
# yomi: その読み、但し無声化サインの`’`は除去 | |
# print(parsed) | |
yomi = parsed[i]["pron"] | |
tmp_parsed = parsed[i] | |
if i != len(parsed) - 1 and parsed[i + 1]["string"] in [ | |
"々", | |
"ゝ", | |
"ヽ", | |
"ゞ", | |
"ヾ", | |
"゛", | |
]: | |
word = parsed[i]["string"] + parsed[i + 1]["string"] | |
i += 1 | |
else: | |
word = parsed[i]["string"] | |
word, yomi = replace_punctuation(word), yomi.replace("’", "") | |
""" | |
ここで`yomi`の取りうる値は以下の通りのはず。 | |
- `word`が通常単語 → 通常の読み(カタカナ) | |
(カタカナからなり、長音記号も含みうる、`アー` 等) | |
- `word`が`ー` から始まる → `ーラー` や `ーーー` など | |
- `word`が句読点や空白等 → `、` | |
- `word`が`?` → `?`(全角になる) | |
他にも`word`が読めないキリル文字アラビア文字等が来ると`、`になるが、正規化でこの場合は起きないはず。 | |
また元のコードでは`yomi`が空白の場合の処理があったが、これは起きないはず。 | |
処理すべきは`yomi`が`、`の場合のみのはず。 | |
""" | |
assert yomi != "", f"Empty yomi: {word}" | |
if yomi == "、": | |
# wordは正規化されているので、`.`, `,`, `!`, `'`, `-`のいずれか | |
if word not in ( | |
".", | |
",", | |
"!", | |
"'", | |
"-", | |
"?", | |
":", | |
";", | |
"…", | |
"", | |
): | |
# ここはpyopenjtalkが読めない文字等のときに起こる | |
#print( | |
# "{}Cannot read:{}, yomi:{}, new_word:{};".format( | |
# parsed, word, yomi, self.japan_JH2K.convert(word)[0]["kana"] | |
# ) | |
#) | |
# raise ValueError(word) | |
word = self.japan_JH2K.convert(word)[0]["kana"] | |
# print(word, self.japan_JH2K.convert(word)[0]['kana'], kata2phoneme_list(self.japan_JH2K.convert(word)[0]['kana'])) | |
tmp_parsed["pron"] = word | |
# yomi = "-" | |
# word = ',' | |
# yomiは元の記号のままに変更 | |
# else: | |
# parsed[i]['pron'] = parsed[i]["string"] | |
yomi = word | |
elif yomi == "?": | |
assert word == "?", f"yomi `?` comes from: {word}" | |
yomi = "?" | |
if word == "": | |
i += 1 | |
continue | |
sep_text.append(word) | |
sep_kata.append(yomi) | |
# print(word, yomi, parts) | |
fix_parsed.append(tmp_parsed) | |
i += 1 | |
# print(sep_text, sep_kata) | |
return sep_text, sep_kata, fix_parsed | |
def getSentencePhone(self, sentence, blank_mode=True, phoneme_mode=False): | |
# print("origin:", sentence) | |
words = [] | |
words_phone_len = [] | |
short_char_flag = False | |
output_duration_flag = [] | |
output_before_sil_flag = [] | |
normed_text = [] | |
sentence = sentence.strip().strip("'") | |
sentence = re.sub(r"\s+", "", sentence) | |
output_res = [] | |
failed_words = [] | |
last_long_pause = 4 | |
last_word = None | |
frontend_text = pyopenjtalk.run_frontend(sentence) | |
# print("frontend_text: ", frontend_text) | |
try: | |
frontend_text = pyopenjtalk.estimate_accent(frontend_text) | |
except: | |
pass | |
# print("estimate_accent: ", frontend_text) | |
# sep_text: 単語単位の単語のリスト | |
# sep_kata: 単語単位の単語のカタカナ読みのリスト | |
sep_text, sep_kata, frontend_text = self.text2sep_kata(frontend_text) | |
# print("sep_text: ", sep_text) | |
# print("sep_kata: ", sep_kata) | |
# print("frontend_text: ", frontend_text) | |
# sep_phonemes: 各単語ごとの音素のリストのリスト | |
sep_phonemes = handle_long_word([kata2phoneme_list(i) for i in sep_kata]) | |
# print("sep_phonemes: ", sep_phonemes) | |
pron_text = [x["pron"].strip().replace("’", "") for x in frontend_text] | |
# pdb.set_trace() | |
prosodys = pyopenjtalk.make_label(frontend_text) | |
prosodys = frontend2phoneme(prosodys, drop_unvoiced_vowels=True) | |
# print("prosodys: ", ' '.join(prosodys)) | |
# print("pron_text: ", pron_text) | |
normed_text = [x["string"].strip() for x in frontend_text] | |
# punctuationがすべて消えた、音素とアクセントのタプルのリスト | |
phone_tone_list_wo_punct = g2phone_tone_wo_punct(prosodys) | |
# print("phone_tone_list_wo_punct: ", phone_tone_list_wo_punct) | |
# phone_w_punct: sep_phonemesを結合した、punctuationを元のまま保持した音素列 | |
phone_w_punct: list[str] = [] | |
w_p_len = [] | |
for i in sep_phonemes: | |
phone_w_punct += i | |
w_p_len.append(len(i)) | |
phone_w_punct = phone_w_punct[:-1] | |
# punctuation無しのアクセント情報を使って、punctuationを含めたアクセント情報を作る | |
# print("phone_w_punct: ", phone_w_punct) | |
# print("phone_tone_list_wo_punct: ", phone_tone_list_wo_punct) | |
phone_tone_list = align_tones(phone_w_punct, phone_tone_list_wo_punct) | |
jp_item = {} | |
jp_p = "" | |
jp_t = "" | |
# mye rye pye bye nye | |
# je she | |
# print(phone_tone_list) | |
for p, t in phone_tone_list: | |
if p in self.ipa_dict: | |
curr_p = self.ipa_dict[p] | |
jp_p += curr_p | |
jp_t += str(t + 6) * len(curr_p) | |
elif p in punctuation: | |
jp_p += p | |
jp_t += "0" | |
elif p == "▁": | |
jp_p += p | |
jp_t += " " | |
else: | |
print(p, t) | |
jp_p += "|" | |
jp_t += "0" | |
# return phones, tones, w_p_len | |
jp_p = jp_p.replace("▁", " ") | |
jp_t = jp_t.translate(self.table) | |
jp_l = "" | |
for t in jp_t: | |
if t == " ": | |
jp_l += " " | |
else: | |
jp_l += "2" | |
# print(jp_p) | |
# print(jp_t) | |
# print(jp_l) | |
# print(len(jp_p_len), sum(w_p_len), len(jp_p), sum(jp_p_len)) | |
assert len(jp_p) == len(jp_t) and len(jp_p) == len(jp_l) | |
jp_item["jp_p"] = jp_p.replace("| |", "|").rstrip("|") | |
jp_item["jp_t"] = jp_t | |
jp_item["jp_l"] = jp_l | |
jp_item["jp_normed_text"] = " ".join(normed_text) | |
jp_item["jp_pron_text"] = " ".join(pron_text) | |
# jp_item['jp_ruoma'] = sep_phonemes | |
# print(len(normed_text), len(sep_phonemes)) | |
# print(normed_text) | |
return jp_item | |
jpc = JapanesePhoneConverter() | |
def japanese_to_ipa(text, text_tokenizer): | |
# phonemes = text_tokenizer(text) | |
if type(text) == str: | |
return jpc.getSentencePhone(text)["jp_p"] | |
else: | |
result_ph = [] | |
for t in text: | |
result_ph.append(jpc.getSentencePhone(t)["jp_p"]) | |
return result_ph | |