Respair commited on
Commit
c660ff0
·
verified ·
1 Parent(s): e40e676

Update Utils/phonemize/cotlet_utils.py

Browse files
Files changed (1) hide show
  1. Utils/phonemize/cotlet_utils.py +34 -12
Utils/phonemize/cotlet_utils.py CHANGED
@@ -886,7 +886,6 @@ def random_sym_fix_no_space(text):
886
 
887
  return text
888
 
889
-
890
  spaces = dict([
891
 
892
  ("ɯ ɴ","ɯɴ"),
@@ -980,12 +979,13 @@ spaces = dict([
980
  ("niːɕiki","ni iɕiki"),
981
  ("anitɕaɴ","niːtɕaɴ"),
982
  ("daiːtɕi","dai itɕi"),
983
- ("niːta","ni ita"),
984
- ("niːrɯ","ni irɯ"),
985
  ("a—","aː"),
986
- ("waːis","wa ais"),
987
- ("waːiɕ","wa aiɕ"),
988
- ("aːt","a at"),
 
989
  ("waːʔ", "wa aʔ"),
990
 
991
  ("naɴ sono","nani sono"),
@@ -1002,10 +1002,21 @@ spaces = dict([
1002
  ("niːw","ni iw"),
1003
  ("niːkɯ","ni ikɯ"),
1004
  ("de—","de e"),
1005
- ("aːj","aː aj"),
1006
- ("aːɽ","a aɽ"),
1007
- ("aːr","a ar"),
1008
- ("gaːn","ga an"),
 
 
 
 
 
 
 
 
 
 
 
1009
  ("ɕiːk ","ɕi ik"),
1010
  ("ɕijoː neɴ","ɕoɯneɴ"),
1011
  ("aːna","a ana"),
@@ -1014,11 +1025,22 @@ spaces = dict([
1014
  ])
1015
 
1016
 
1017
-
1018
  def random_space_fix(text):
1019
  orig = text
1020
 
1021
  for k, v in spaces.items():
1022
  text = text.replace(k, v)
1023
 
1024
- return text
 
 
 
 
 
 
 
 
 
 
 
 
 
886
 
887
  return text
888
 
 
889
  spaces = dict([
890
 
891
  ("ɯ ɴ","ɯɴ"),
 
979
  ("niːɕiki","ni iɕiki"),
980
  ("anitɕaɴ","niːtɕaɴ"),
981
  ("daiːtɕi","dai itɕi"),
982
+ (" niːta"," ni ita"),
983
+ (" niːrɯ"," ni irɯ"),
984
  ("a—","aː"),
985
+ ("iːki","i iki"),
986
+ (" waːis"," wa ais"),
987
+ (" waːiɕ"," wa aiɕ"),
988
+ # ("aːt","a at"),
989
  ("waːʔ", "wa aʔ"),
990
 
991
  ("naɴ sono","nani sono"),
 
1002
  ("niːw","ni iw"),
1003
  ("niːkɯ","ni ikɯ"),
1004
  ("de—","de e"),
1005
+ (" aːj"," aː aj"),
1006
+ (" aːɽ"," a aɽ"),
1007
+ (" aːr"," a ar"),
1008
+ (" gaːn"," ga an"),
1009
+
1010
+ (" gaːɽɯ "," ga aɽɯ "),
1011
+ (" waːɽɯ "," wa aɽɯ "),
1012
+ (" gaːrɯ "," ga aɽɯ "),
1013
+ (" waːrɯ "," wa aɽɯ "),
1014
+
1015
+ (" gaːɽi"," ga aɽi"),
1016
+ (" waːɽi"," wa aɽi"),
1017
+ (" gaːri"," ga aɽi"),
1018
+ (" waːri"," wa aɽi"),
1019
+
1020
  ("ɕiːk ","ɕi ik"),
1021
  ("ɕijoː neɴ","ɕoɯneɴ"),
1022
  ("aːna","a ana"),
 
1025
  ])
1026
 
1027
 
 
1028
  def random_space_fix(text):
1029
  orig = text
1030
 
1031
  for k, v in spaces.items():
1032
  text = text.replace(k, v)
1033
 
1034
+ return fix_wagas(text)
1035
+
1036
+ def fix_wagas(text):
1037
+
1038
+ pattern = r'\b(gaːɽɯ|waːɽɯ|gaːrɯ|waːrɯ|gaːɽi|waːɽi|gaːri|waːri)\b(?!\s*\w)'
1039
+
1040
+ def replace_match(match):
1041
+ word = match.group(1)
1042
+ if 'ː' in word:
1043
+ return word.replace('ː', ' a')
1044
+ return word
1045
+
1046
+ return re.sub(pattern, replace_match, text)