Spaces:
Running
Running
lojban ipa support
Browse files- app.py +3 -1
- lojban.py +354 -0
- styletts2importable.py +12 -4
app.py
CHANGED
|
@@ -139,7 +139,7 @@ with gr.Blocks() as vctk:
|
|
| 139 |
voice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-2', interactive=True)
|
| 140 |
lang =gr.Dropdown(
|
| 141 |
[
|
| 142 |
-
['English', 'en-us'],
|
| 143 |
['Czech (Non-native)', 'cs'],
|
| 144 |
['Danish (Non-native)', 'da'],
|
| 145 |
['Dutch (Non-native)', 'nl'],
|
|
@@ -157,6 +157,8 @@ with gr.Blocks() as vctk:
|
|
| 157 |
['Spanish (Non-native)', 'es'],
|
| 158 |
['Swedish (Non-native)', 'sv'],
|
| 159 |
['Turkish (Non-native)', 'tr'],
|
|
|
|
|
|
|
| 160 |
],
|
| 161 |
label="Language",
|
| 162 |
)
|
|
|
|
| 139 |
voice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-2', interactive=True)
|
| 140 |
lang =gr.Dropdown(
|
| 141 |
[
|
| 142 |
+
['English (US)', 'en-us'],
|
| 143 |
['Czech (Non-native)', 'cs'],
|
| 144 |
['Danish (Non-native)', 'da'],
|
| 145 |
['Dutch (Non-native)', 'nl'],
|
|
|
|
| 157 |
['Spanish (Non-native)', 'es'],
|
| 158 |
['Swedish (Non-native)', 'sv'],
|
| 159 |
['Turkish (Non-native)', 'tr'],
|
| 160 |
+
# artificial
|
| 161 |
+
['Lojban', 'jb'],
|
| 162 |
],
|
| 163 |
label="Language",
|
| 164 |
)
|
lojban.py
ADDED
|
@@ -0,0 +1,354 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# credits: gleki
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
import sys
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
from re import sub, compile
|
| 7 |
+
from itertools import islice
|
| 8 |
+
|
| 9 |
+
def krulermorna(text: str) -> str:
|
| 10 |
+
text = sub(r"\.", "", text)
|
| 11 |
+
text = sub(r"^", ".", text)
|
| 12 |
+
text = sub(r"u([aeiouy])", r"w\1", text)
|
| 13 |
+
text = sub(r"i([aeiouy])", r"ɩ\1", text)
|
| 14 |
+
text = sub(r"au", "ḁ", text)
|
| 15 |
+
text = sub(r"ai", "ą", text)
|
| 16 |
+
text = sub(r"ei", "ę", text)
|
| 17 |
+
text = sub(r"oi", "ǫ", text)
|
| 18 |
+
text = sub(r"\.", "", text)
|
| 19 |
+
return text
|
| 20 |
+
|
| 21 |
+
def krulermornaize(words: list[str]) -> list[str]:
|
| 22 |
+
return [krulermorna(word) for word in words]
|
| 23 |
+
|
| 24 |
+
ipa_vits = {
|
| 25 |
+
"a$": 'aː',
|
| 26 |
+
"a": 'aː',
|
| 27 |
+
# "e(?=v)": 'ɛːʔ',
|
| 28 |
+
# "e$": 'ɛːʔ',
|
| 29 |
+
"e": 'ɛː',
|
| 30 |
+
"i": 'iː',
|
| 31 |
+
"o": 'oː',
|
| 32 |
+
"u": 'ʊu',
|
| 33 |
+
# "u": 'ʊː',
|
| 34 |
+
"y": 'əː',
|
| 35 |
+
"ą": 'aɪ',
|
| 36 |
+
"ę": 'ɛɪ',
|
| 37 |
+
# "ę(?=\b)(?!')": 'ɛɪʔ',
|
| 38 |
+
"ǫ": 'ɔɪ',
|
| 39 |
+
"ḁ": 'aʊ',
|
| 40 |
+
"ɩa": 'jaː',
|
| 41 |
+
"ɩe": 'jɛː',
|
| 42 |
+
"ɩi": 'jiː',
|
| 43 |
+
"ɩo": 'jɔː',
|
| 44 |
+
"ɩu": 'juː',
|
| 45 |
+
"ɩy": 'jəː',
|
| 46 |
+
"ɩ": 'j',
|
| 47 |
+
"wa": 'waː',
|
| 48 |
+
"we": 'wɛː',
|
| 49 |
+
"wi": 'wiː',
|
| 50 |
+
"wo": 'wɔː',
|
| 51 |
+
"wu": 'wuː',
|
| 52 |
+
"wy": 'wəː',
|
| 53 |
+
"w": 'w',
|
| 54 |
+
"c": 'ʃ',
|
| 55 |
+
# "bj": 'bʒ',
|
| 56 |
+
"j": 'ʒ',
|
| 57 |
+
"s": 's',
|
| 58 |
+
"z": 'z',
|
| 59 |
+
"f": 'f',
|
| 60 |
+
"v": 'v',
|
| 61 |
+
"x": 'hhh',
|
| 62 |
+
"'": 'h',
|
| 63 |
+
# "dj":'dʒ',
|
| 64 |
+
# "tc":'tʃ',
|
| 65 |
+
# "dz":'ʣ',
|
| 66 |
+
# "ts":'ʦ',
|
| 67 |
+
'r': 'ɹ',
|
| 68 |
+
'r(?![ˈaeiouyḁąęǫ])': 'ɹɹ',
|
| 69 |
+
# 'r(?=[ˈaeiouyḁąęǫ])': 'ɹ',
|
| 70 |
+
"nˈu": 'nˈʊuː',
|
| 71 |
+
"nu": 'nʊuː',
|
| 72 |
+
"ng": 'n.g',
|
| 73 |
+
"n": 'n',
|
| 74 |
+
"m": 'm',
|
| 75 |
+
"l": 'l',
|
| 76 |
+
"b": 'b',
|
| 77 |
+
"d": 'd',
|
| 78 |
+
"g": 'ɡ',
|
| 79 |
+
"k": 'k',
|
| 80 |
+
"p": 'p',
|
| 81 |
+
"t": 't',
|
| 82 |
+
"h": 'h'
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
ipa_nix = {
|
| 86 |
+
"a$": 'aː',
|
| 87 |
+
"a": 'aː',
|
| 88 |
+
# "e(?=v)": 'ɛːʔ',
|
| 89 |
+
# "e$": 'ɛːʔ',
|
| 90 |
+
"e": 'ɛː',
|
| 91 |
+
"i": 'iː',
|
| 92 |
+
"o": 'oː',
|
| 93 |
+
"u": 'ʊu',
|
| 94 |
+
# "u": 'ʊː',
|
| 95 |
+
"y": 'əː',
|
| 96 |
+
"ą": 'aɪ',
|
| 97 |
+
"ę": 'ɛɪ',
|
| 98 |
+
# "ę(?=\b)(?!')": 'ɛɪʔ',
|
| 99 |
+
"ǫ": 'ɔɪ',
|
| 100 |
+
"ḁ": 'aʊ',
|
| 101 |
+
"ɩa": 'jaː',
|
| 102 |
+
"ɩe": 'jɛː',
|
| 103 |
+
"ɩi": 'jiː',
|
| 104 |
+
"ɩo": 'jɔː',
|
| 105 |
+
"ɩu": 'juː',
|
| 106 |
+
"ɩy": 'jəː',
|
| 107 |
+
"ɩ": 'j',
|
| 108 |
+
"wa": 'waː',
|
| 109 |
+
"we": 'wɛː',
|
| 110 |
+
"wi": 'wiː',
|
| 111 |
+
"wo": 'wɔː',
|
| 112 |
+
"wu": 'wuː',
|
| 113 |
+
"wy": 'wəː',
|
| 114 |
+
"w": 'w',
|
| 115 |
+
"c": 'ʃ',
|
| 116 |
+
"gj": 'gɪʒ',
|
| 117 |
+
"bj": 'bɪʒ',
|
| 118 |
+
"j": 'ʒ',
|
| 119 |
+
"s": 's',
|
| 120 |
+
"z": 'z',
|
| 121 |
+
"f": 'f',
|
| 122 |
+
"v": 'v',
|
| 123 |
+
"x": 'hh',
|
| 124 |
+
"'": 'h',
|
| 125 |
+
# "dj":'dʒ',
|
| 126 |
+
# "tc":'tʃ',
|
| 127 |
+
# "dz":'ʣ',
|
| 128 |
+
# "ts":'ʦ',
|
| 129 |
+
'r': 'ɹ',
|
| 130 |
+
'r(?![ˈaeiouyḁąęǫ])': 'ɹɹɹɪ',
|
| 131 |
+
# 'r(?=[ˈaeiouyḁąęǫ])': 'ɹ',
|
| 132 |
+
"nˈu": 'nˈʊuː',
|
| 133 |
+
"nu": 'nʊuː',
|
| 134 |
+
"ng": 'ng',
|
| 135 |
+
"n": 'n',
|
| 136 |
+
"m": 'm',
|
| 137 |
+
"l": 'l',
|
| 138 |
+
"b": 'b',
|
| 139 |
+
"d": 'd',
|
| 140 |
+
"g": 'ɡ',
|
| 141 |
+
"k": 'k',
|
| 142 |
+
"p": 'p',
|
| 143 |
+
"t": 't',
|
| 144 |
+
"h": 'h'
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
vowel_pattern = compile("[aeiouyąęǫḁ]")
|
| 148 |
+
vowel_coming_pattern = compile("(?=[aeiouyąęǫḁ])")
|
| 149 |
+
diphthong_coming_pattern = compile("(?=[ąęǫḁ])")
|
| 150 |
+
|
| 151 |
+
question_words = krulermornaize(["ma", "mo", "xu"])
|
| 152 |
+
starter_words = krulermornaize(["le", "lo", "lei", "loi"])
|
| 153 |
+
terminator_words = krulermornaize(["kei", "ku'o", "vau", "li'u"])
|
| 154 |
+
|
| 155 |
+
def lojban2ipa(text: str, mode: str) -> str:
|
| 156 |
+
if mode == 'vits':
|
| 157 |
+
return lojban2ipa_vits(text)
|
| 158 |
+
if mode == 'nix':
|
| 159 |
+
return lojban2ipa_nix(text)
|
| 160 |
+
return lojban2ipa_vits(text)
|
| 161 |
+
|
| 162 |
+
def lojban2ipa_vits(text: str) -> str:
|
| 163 |
+
text = krulermorna(text.strip())
|
| 164 |
+
words = text.split(' ')
|
| 165 |
+
rebuilt_words = []
|
| 166 |
+
question_sentence = False
|
| 167 |
+
for index, word in enumerate([*words]):
|
| 168 |
+
modified_word = word
|
| 169 |
+
prefix, postfix = "", ""
|
| 170 |
+
|
| 171 |
+
if word in question_words:
|
| 172 |
+
postfix = "?"
|
| 173 |
+
prefix=" " + prefix
|
| 174 |
+
# question_sentence = True
|
| 175 |
+
|
| 176 |
+
if word in starter_words:
|
| 177 |
+
prefix=" " + prefix
|
| 178 |
+
# question_sentence = True
|
| 179 |
+
|
| 180 |
+
if word in terminator_words:
|
| 181 |
+
postfix = ", "
|
| 182 |
+
# if not vowel_pattern.match(word[-1:][0]):
|
| 183 |
+
# postfix += "ʔ"
|
| 184 |
+
# # cmevla
|
| 185 |
+
# if not vowel_pattern.match(word[0]):
|
| 186 |
+
# prefix += "ʔ"
|
| 187 |
+
|
| 188 |
+
# if vowel_pattern.match(word[0]):
|
| 189 |
+
# prefix = "ʔ" + prefix
|
| 190 |
+
|
| 191 |
+
if index == 0 or word in ["ni'o", "i"]:
|
| 192 |
+
prefix = ", " + prefix
|
| 193 |
+
|
| 194 |
+
split_word = vowel_coming_pattern.split(word)
|
| 195 |
+
tail_word = split_word[-2:]
|
| 196 |
+
# add stress to {klama}, {ni'o}
|
| 197 |
+
if len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(vowel_pattern.match(tail_word[0][0])) and bool(vowel_pattern.match(tail_word[1][0])):
|
| 198 |
+
head_word = split_word[:-2]
|
| 199 |
+
modified_word = "".join(head_word) + "ˈ" + "".join(tail_word)
|
| 200 |
+
# prefix=" " + prefix
|
| 201 |
+
# add a pause after two-syllable words
|
| 202 |
+
postfix = postfix + " "
|
| 203 |
+
# add stress to {lau}, {coi}
|
| 204 |
+
elif len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(diphthong_coming_pattern.match(tail_word[1][0])):
|
| 205 |
+
head_word = split_word[:-2]
|
| 206 |
+
modified_word = "".join(head_word) + tail_word[0] + "ˈ" + tail_word[1]
|
| 207 |
+
# prefix=" " + prefix
|
| 208 |
+
postfix = postfix + " "
|
| 209 |
+
# add stress to {le}
|
| 210 |
+
# elif len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(vowel_pattern.match(tail_word[1][0])):
|
| 211 |
+
# head_word = split_word[:-2]
|
| 212 |
+
# modified_word = "".join(head_word) + tail_word[0] + "ˈ" + tail_word[1]+" "
|
| 213 |
+
# postfix =postfix +" "
|
| 214 |
+
|
| 215 |
+
# add a pause even after a cmavo
|
| 216 |
+
if not (index - 1 >= 0 and words[index-1] in starter_words):
|
| 217 |
+
prefix = " " + prefix
|
| 218 |
+
|
| 219 |
+
# # add a pause before {.alis}
|
| 220 |
+
# if bool(vowel_pattern.match(word[0])):
|
| 221 |
+
# word = ", " + word
|
| 222 |
+
|
| 223 |
+
"""
|
| 224 |
+
for each letter: if the slice matches then convert the letter
|
| 225 |
+
"""
|
| 226 |
+
rebuilt_word = ""
|
| 227 |
+
lit = enumerate([*modified_word])
|
| 228 |
+
for idx, x in lit:
|
| 229 |
+
tail = modified_word[idx:]
|
| 230 |
+
matched = False
|
| 231 |
+
consumed = 1
|
| 232 |
+
for attr, val in sorted(ipa_vits.items(), key=lambda x: len(str(x[0])), reverse=True):
|
| 233 |
+
pattern = compile("^"+attr)
|
| 234 |
+
matches = pattern.findall(tail)
|
| 235 |
+
if len(matches)>0:
|
| 236 |
+
match = matches[0]
|
| 237 |
+
consumed = len(match)
|
| 238 |
+
rebuilt_word += val
|
| 239 |
+
matched = True
|
| 240 |
+
break
|
| 241 |
+
if not matched:
|
| 242 |
+
rebuilt_word += x
|
| 243 |
+
[next(lit, None) for _ in range(consumed - 1)]
|
| 244 |
+
|
| 245 |
+
rebuilt_words.append(prefix+rebuilt_word+postfix)
|
| 246 |
+
|
| 247 |
+
output = "".join(rebuilt_words).strip()
|
| 248 |
+
output = sub(r" {2,}", " ", output)
|
| 249 |
+
output = sub(r", ?(?=,)", "", output)
|
| 250 |
+
|
| 251 |
+
if question_sentence == True:
|
| 252 |
+
output += "?"
|
| 253 |
+
elif bool(vowel_pattern.match(text[-1:][0])):
|
| 254 |
+
output += "."
|
| 255 |
+
|
| 256 |
+
return output
|
| 257 |
+
|
| 258 |
+
def lojban2ipa_nix(text: str) -> str:
|
| 259 |
+
text = krulermorna(text.strip())
|
| 260 |
+
words = text.split(' ')
|
| 261 |
+
rebuilt_words = []
|
| 262 |
+
question_sentence = False
|
| 263 |
+
for index, word in enumerate([*words]):
|
| 264 |
+
modified_word = word
|
| 265 |
+
prefix, postfix = "", ""
|
| 266 |
+
|
| 267 |
+
if word in question_words:
|
| 268 |
+
# postfix = "?"
|
| 269 |
+
prefix=" " + prefix
|
| 270 |
+
# question_sentence = True
|
| 271 |
+
|
| 272 |
+
if word in starter_words:
|
| 273 |
+
prefix=" " + prefix
|
| 274 |
+
# question_sentence = True
|
| 275 |
+
|
| 276 |
+
if word in terminator_words:
|
| 277 |
+
postfix = ", "
|
| 278 |
+
# if not vowel_pattern.match(word[-1:][0]):
|
| 279 |
+
# postfix += "ʔ"
|
| 280 |
+
# # cmevla
|
| 281 |
+
# if not vowel_pattern.match(word[0]):
|
| 282 |
+
# prefix += "ʔ"
|
| 283 |
+
|
| 284 |
+
# if vowel_pattern.match(word[0]):
|
| 285 |
+
# prefix = "ʔ" + prefix
|
| 286 |
+
|
| 287 |
+
if index == 0 or word in ["ni'o", "i"]:
|
| 288 |
+
prefix = ", " + prefix
|
| 289 |
+
|
| 290 |
+
split_word = vowel_coming_pattern.split(word)
|
| 291 |
+
tail_word = split_word[-2:]
|
| 292 |
+
# add stress to {klama}, {ni'o}
|
| 293 |
+
if len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(vowel_pattern.match(tail_word[0][0])) and bool(vowel_pattern.match(tail_word[1][0])):
|
| 294 |
+
head_word = split_word[:-2]
|
| 295 |
+
modified_word = "".join(head_word) + "ˈ" + "".join(tail_word)
|
| 296 |
+
# prefix=" " + prefix
|
| 297 |
+
# add a pause after two-syllable words
|
| 298 |
+
postfix = postfix + " "
|
| 299 |
+
# add stress to {lau}, {coi}
|
| 300 |
+
elif len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(diphthong_coming_pattern.match(tail_word[1][0])):
|
| 301 |
+
head_word = split_word[:-2]
|
| 302 |
+
modified_word = "".join(head_word) + tail_word[0] + "ˈ" + tail_word[1]
|
| 303 |
+
# prefix=" " + prefix
|
| 304 |
+
postfix = postfix + " "
|
| 305 |
+
# add stress to {le}
|
| 306 |
+
# elif len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(vowel_pattern.match(tail_word[1][0])):
|
| 307 |
+
# head_word = split_word[:-2]
|
| 308 |
+
# modified_word = "".join(head_word) + tail_word[0] + "ˈ" + tail_word[1]+" "
|
| 309 |
+
# postfix =postfix +" "
|
| 310 |
+
|
| 311 |
+
# add a pause even after a cmavo
|
| 312 |
+
if not (index - 1 >= 0 and words[index-1] in starter_words):
|
| 313 |
+
prefix = " " + prefix
|
| 314 |
+
|
| 315 |
+
# # add a pause before {.alis}
|
| 316 |
+
# if bool(vowel_pattern.match(word[0])):
|
| 317 |
+
# word = ", " + word
|
| 318 |
+
|
| 319 |
+
"""
|
| 320 |
+
for each letter: if the slice matches then convert the letter
|
| 321 |
+
"""
|
| 322 |
+
rebuilt_word = ""
|
| 323 |
+
lit = enumerate([*modified_word])
|
| 324 |
+
for idx, x in lit:
|
| 325 |
+
tail = modified_word[idx:]
|
| 326 |
+
matched = False
|
| 327 |
+
consumed = 1
|
| 328 |
+
for attr, val in sorted(ipa_nix.items(), key=lambda x: len(str(x[0])), reverse=True):
|
| 329 |
+
pattern = compile("^"+attr)
|
| 330 |
+
matches = pattern.findall(tail)
|
| 331 |
+
if len(matches)>0:
|
| 332 |
+
match = matches[0]
|
| 333 |
+
consumed = len(match)
|
| 334 |
+
rebuilt_word += val
|
| 335 |
+
matched = True
|
| 336 |
+
break
|
| 337 |
+
if not matched:
|
| 338 |
+
rebuilt_word += x
|
| 339 |
+
[next(lit, None) for _ in range(consumed - 1)]
|
| 340 |
+
|
| 341 |
+
rebuilt_words.append(prefix+rebuilt_word+postfix)
|
| 342 |
+
|
| 343 |
+
output = "".join(rebuilt_words).strip()
|
| 344 |
+
output = sub(r" {2,}", " ", output)
|
| 345 |
+
output = sub(r", ?(?=,)", "", output)
|
| 346 |
+
|
| 347 |
+
if question_sentence == True:
|
| 348 |
+
output += "?"
|
| 349 |
+
elif bool(vowel_pattern.match(text[-1:][0])):
|
| 350 |
+
output += "."
|
| 351 |
+
|
| 352 |
+
return output
|
| 353 |
+
|
| 354 |
+
# print(lojban2ipa("ni'o le pa tirxu be me'e zo .teris. pu ki kansa le za'u pendo be le nei le ka xabju le foldi be loi spati"))
|
styletts2importable.py
CHANGED
|
@@ -136,6 +136,7 @@ sampler = DiffusionSampler(
|
|
| 136 |
)
|
| 137 |
|
| 138 |
LANG_NAMES = {
|
|
|
|
| 139 |
'en-us': 'english',
|
| 140 |
'cs': 'czech',
|
| 141 |
'da': 'danish',
|
|
@@ -169,10 +170,17 @@ def inference(text, ref_s, lang='en-us', alpha = 0.3, beta = 0.7, diffusion_step
|
|
| 169 |
if (ipa_sections is not None):
|
| 170 |
text = re.sub(regex, '[]', text, 0, re.MULTILINE)
|
| 171 |
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
|
| 177 |
# add the IPA back
|
| 178 |
if (ipa_sections is not None):
|
|
|
|
| 136 |
)
|
| 137 |
|
| 138 |
LANG_NAMES = {
|
| 139 |
+
# natural; supported by nltk
|
| 140 |
'en-us': 'english',
|
| 141 |
'cs': 'czech',
|
| 142 |
'da': 'danish',
|
|
|
|
| 170 |
if (ipa_sections is not None):
|
| 171 |
text = re.sub(regex, '[]', text, 0, re.MULTILINE)
|
| 172 |
|
| 173 |
+
if lang in LANG_NAMES:
|
| 174 |
+
local_phonemizer = phonemizer.backend.EspeakBackend(language=lang, preserve_punctuation=True, with_stress=True)
|
| 175 |
+
ps = local_phonemizer.phonemize([text])
|
| 176 |
+
ps = word_tokenize(ps[0], language=LANG_NAMES[lang])
|
| 177 |
+
ps = ' '.join(ps)
|
| 178 |
+
elif lang == 'jb':
|
| 179 |
+
# Lojban language
|
| 180 |
+
import lojban
|
| 181 |
+
ps = lojban.lojban2ipa(text, 'vits')
|
| 182 |
+
else:
|
| 183 |
+
ps = text
|
| 184 |
|
| 185 |
# add the IPA back
|
| 186 |
if (ipa_sections is not None):
|