Spaces:
Running
Running
import re | |
mapping = { | |
"n\u0303": "\xf1", | |
"g\u0306": "\u011f", | |
"i\u0307": "i", | |
"u\u0308": "\xfc", | |
"o\u0308": "\xf6", | |
"\xe7": "\u04ab", | |
"c\u0327": "\u04ab", | |
"s\u0327": "\u015f", | |
"a\u0302": "\xe2", | |
"w": "v", | |
"x": "ks" | |
} | |
zero = { | |
0: 'sıfır', | |
} | |
numbers_map = { | |
1: 'bir', | |
2: 'eki', | |
3: 'üç', | |
4: 'dört', | |
5: 'beş', | |
6: 'altı', | |
7: 'yedi', | |
8: 'sekiz', | |
9: 'doquz', | |
10: 'on', | |
20: 'yigirmi', | |
30: 'otuz', | |
40: 'qırq', | |
50: 'elli', | |
60: 'altmış', | |
70: 'yetmiş', | |
80: 'seksen', | |
90: 'doqsan', | |
100: 'yüz', | |
1000: 'biñ', | |
1_000_000: 'million', | |
1_000_000_000: 'milliard' | |
} | |
def spell_numbers(numbers: str) -> str: | |
numbers_map_with_zero = {**numbers_map,**zero} | |
for i in range(0, 10): | |
numbers = numbers.replace(str(i), numbers_map_with_zero[i] + ' ') | |
return numbers.strip() | |
def num2word(n): | |
if n in numbers_map: | |
return numbers_map[n] | |
elif n < 100: | |
tens = (n // 10) * 10 | |
units = n % 10 | |
if units == 0: | |
return '' | |
return (numbers_map[tens] + ' ' + numbers_map[units]).strip() | |
elif n < 1000: | |
hundreds = n // 100 | |
rest = n % 100 | |
return (num2word(hundreds) + ' ' + numbers_map[100] + ' ' + num2word(rest)).strip() | |
elif n < 1_000_000: | |
thousands = n // 1_000 | |
rest = n % 1_000 | |
return (num2word(thousands) + ' ' + numbers_map[1_000] + ' ' + num2word(rest)).strip() | |
elif n < 1_000_000_000: | |
millions = n // 1_000_000 | |
rest = n % 1_000_000 | |
return (num2word(millions) + ' ' + numbers_map[1_000_000] + ' ' + num2word(rest)).strip() | |
elif n < 1_000_000_000_000: | |
billions = n // 1_000_000_000 | |
rest = n % 1_000_000_000 | |
return (num2word(billions) + ' ' + numbers_map[1_000_000_000] + ' ' + num2word(rest)).strip() | |
else: | |
return spell_numbers(str(n)) | |
def preprocess(text): | |
text = text.lower() # always treat lowercase | |
text = " " + text + " " | |
for symbol in mapping.keys(): | |
text = re.sub(symbol, mapping[symbol], text) | |
separators = "?!" # TODO: add proper symbols to tts | |
for symbol in separators: | |
text = text.replace(symbol, ".") | |
while True: | |
number_match = re.search("-?\d+(\.|,)?(\d+)?", text) | |
if number_match is None: | |
break | |
print(number_match.string, number_match.start(), number_match.end()) | |
number = number_match.string.strip() | |
prefix = "" | |
if number.startswith("-"): | |
prefix = "minus " | |
number = number.replace("-", "", 1) | |
elif number.startswith("+"): | |
prefix = "plüs " | |
number = number.replace("+", "", 1) | |
if "." in number: | |
number = number.split(".") | |
number = prefix + " noqta ".join((num2word(int(number[0])) if int(number[0]) != 0 else spell_numbers(number[0]), spell_numbers(number[1]))) | |
text = text.replace(number_match.string.strip(), number, 1) | |
continue | |
elif "," in number: | |
number = number.split(",") | |
number = prefix + " virgül ".join((num2word(int(number[0])) if int(number[0]) != 0 else spell_numbers(number[0]), spell_numbers(number[1]))) | |
text = text.replace(number_match.string.strip(), number, 1) | |
continue | |
if number.startswith("0"): | |
text = text.replace(number_match.string.strip(), prefix + spell_numbers(number), 1) | |
continue | |
text = text.replace(number_match.string.strip(), prefix + num2word(int(number)), 1) | |
return text.strip() | |