Update Utils/phonemize/cotlet_utils.py
Browse files- Utils/phonemize/cotlet_utils.py +34 -12
Utils/phonemize/cotlet_utils.py
CHANGED
@@ -886,7 +886,6 @@ def random_sym_fix_no_space(text):
|
|
886 |
|
887 |
return text
|
888 |
|
889 |
-
|
890 |
spaces = dict([
|
891 |
|
892 |
("ɯ ɴ","ɯɴ"),
|
@@ -980,12 +979,13 @@ spaces = dict([
|
|
980 |
("niːɕiki","ni iɕiki"),
|
981 |
("anitɕaɴ","niːtɕaɴ"),
|
982 |
("daiːtɕi","dai itɕi"),
|
983 |
-
("niːta","ni ita"),
|
984 |
-
("niːrɯ","ni irɯ"),
|
985 |
("a—","aː"),
|
986 |
-
("
|
987 |
-
("
|
988 |
-
("
|
|
|
989 |
("waːʔ", "wa aʔ"),
|
990 |
|
991 |
("naɴ sono","nani sono"),
|
@@ -1002,10 +1002,21 @@ spaces = dict([
|
|
1002 |
("niːw","ni iw"),
|
1003 |
("niːkɯ","ni ikɯ"),
|
1004 |
("de—","de e"),
|
1005 |
-
("aːj","aː aj"),
|
1006 |
-
("aːɽ","a aɽ"),
|
1007 |
-
("aːr","a ar"),
|
1008 |
-
("gaːn","ga an"),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1009 |
("ɕiːk ","ɕi ik"),
|
1010 |
("ɕijoː neɴ","ɕoɯneɴ"),
|
1011 |
("aːna","a ana"),
|
@@ -1014,11 +1025,22 @@ spaces = dict([
|
|
1014 |
])
|
1015 |
|
1016 |
|
1017 |
-
|
1018 |
def random_space_fix(text):
|
1019 |
orig = text
|
1020 |
|
1021 |
for k, v in spaces.items():
|
1022 |
text = text.replace(k, v)
|
1023 |
|
1024 |
-
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
886 |
|
887 |
return text
|
888 |
|
|
|
889 |
spaces = dict([
|
890 |
|
891 |
("ɯ ɴ","ɯɴ"),
|
|
|
979 |
("niːɕiki","ni iɕiki"),
|
980 |
("anitɕaɴ","niːtɕaɴ"),
|
981 |
("daiːtɕi","dai itɕi"),
|
982 |
+
(" niːta"," ni ita"),
|
983 |
+
(" niːrɯ"," ni irɯ"),
|
984 |
("a—","aː"),
|
985 |
+
("iːki","i iki"),
|
986 |
+
(" waːis"," wa ais"),
|
987 |
+
(" waːiɕ"," wa aiɕ"),
|
988 |
+
# ("aːt","a at"),
|
989 |
("waːʔ", "wa aʔ"),
|
990 |
|
991 |
("naɴ sono","nani sono"),
|
|
|
1002 |
("niːw","ni iw"),
|
1003 |
("niːkɯ","ni ikɯ"),
|
1004 |
("de—","de e"),
|
1005 |
+
(" aːj"," aː aj"),
|
1006 |
+
(" aːɽ"," a aɽ"),
|
1007 |
+
(" aːr"," a ar"),
|
1008 |
+
(" gaːn"," ga an"),
|
1009 |
+
|
1010 |
+
(" gaːɽɯ "," ga aɽɯ "),
|
1011 |
+
(" waːɽɯ "," wa aɽɯ "),
|
1012 |
+
(" gaːrɯ "," ga aɽɯ "),
|
1013 |
+
(" waːrɯ "," wa aɽɯ "),
|
1014 |
+
|
1015 |
+
(" gaːɽi"," ga aɽi"),
|
1016 |
+
(" waːɽi"," wa aɽi"),
|
1017 |
+
(" gaːri"," ga aɽi"),
|
1018 |
+
(" waːri"," wa aɽi"),
|
1019 |
+
|
1020 |
("ɕiːk ","ɕi ik"),
|
1021 |
("ɕijoː neɴ","ɕoɯneɴ"),
|
1022 |
("aːna","a ana"),
|
|
|
1025 |
])
|
1026 |
|
1027 |
|
|
|
1028 |
def random_space_fix(text):
|
1029 |
orig = text
|
1030 |
|
1031 |
for k, v in spaces.items():
|
1032 |
text = text.replace(k, v)
|
1033 |
|
1034 |
+
return fix_wagas(text)
|
1035 |
+
|
1036 |
+
def fix_wagas(text):
|
1037 |
+
|
1038 |
+
pattern = r'\b(gaːɽɯ|waːɽɯ|gaːrɯ|waːrɯ|gaːɽi|waːɽi|gaːri|waːri)\b(?!\s*\w)'
|
1039 |
+
|
1040 |
+
def replace_match(match):
|
1041 |
+
word = match.group(1)
|
1042 |
+
if 'ː' in word:
|
1043 |
+
return word.replace('ː', ' a')
|
1044 |
+
return word
|
1045 |
+
|
1046 |
+
return re.sub(pattern, replace_match, text)
|