Yurii Paniv
Initial numbers
a2689f4
raw
history blame
3.72 kB
import re
mapping = {
"n\u0303": "\xf1",
"g\u0306": "\u011f",
"i\u0307": "i",
"u\u0308": "\xfc",
"o\u0308": "\xf6",
"\xe7": "\u04ab",
"c\u0327": "\u04ab",
"s\u0327": "\u015f",
"a\u0302": "\xe2",
"w": "v",
"x": "ks"
}
zero = {
0: 'sıfır',
}
numbers_map = {
1: 'bir',
2: 'eki',
3: 'üç',
4: 'dört',
5: 'beş',
6: 'altı',
7: 'yedi',
8: 'sekiz',
9: 'doquz',
10: 'on',
20: 'yigirmi',
30: 'otuz',
40: 'qırq',
50: 'elli',
60: 'altmış',
70: 'yetmiş',
80: 'seksen',
90: 'doqsan',
100: 'yüz',
1000: 'biñ',
1_000_000: 'million',
1_000_000_000: 'milliard'
}
def spell_numbers(numbers: str) -> str:
numbers_map_with_zero = {**numbers_map,**zero}
for i in range(0, 10):
numbers = numbers.replace(str(i), numbers_map_with_zero[i] + ' ')
return numbers.strip()
def num2word(n):
if n in numbers_map:
return numbers_map[n]
elif n < 100:
tens = (n // 10) * 10
units = n % 10
if units == 0:
return ''
return (numbers_map[tens] + ' ' + numbers_map[units]).strip()
elif n < 1000:
hundreds = n // 100
rest = n % 100
return (num2word(hundreds) + ' ' + numbers_map[100] + ' ' + num2word(rest)).strip()
elif n < 1_000_000:
thousands = n // 1_000
rest = n % 1_000
return (num2word(thousands) + ' ' + numbers_map[1_000] + ' ' + num2word(rest)).strip()
elif n < 1_000_000_000:
millions = n // 1_000_000
rest = n % 1_000_000
return (num2word(millions) + ' ' + numbers_map[1_000_000] + ' ' + num2word(rest)).strip()
elif n < 1_000_000_000_000:
billions = n // 1_000_000_000
rest = n % 1_000_000_000
return (num2word(billions) + ' ' + numbers_map[1_000_000_000] + ' ' + num2word(rest)).strip()
else:
return spell_numbers(str(n))
def preprocess(text):
text = text.lower() # always treat lowercase
text = " " + text + " "
for symbol in mapping.keys():
text = re.sub(symbol, mapping[symbol], text)
separators = "?!" # TODO: add proper symbols to tts
for symbol in separators:
text = text.replace(symbol, ".")
while True:
number_match = re.search("-?\d+(\.|,)?(\d+)?", text)
if number_match is None:
break
print(number_match.string, number_match.start(), number_match.end())
number = number_match.string.strip()
prefix = ""
if number.startswith("-"):
prefix = "minus "
number = number.replace("-", "", 1)
elif number.startswith("+"):
prefix = "plüs "
number = number.replace("+", "", 1)
if "." in number:
number = number.split(".")
number = prefix + " noqta ".join((num2word(int(number[0])) if int(number[0]) != 0 else spell_numbers(number[0]), spell_numbers(number[1])))
text = text.replace(number_match.string.strip(), number, 1)
continue
elif "," in number:
number = number.split(",")
number = prefix + " virgül ".join((num2word(int(number[0])) if int(number[0]) != 0 else spell_numbers(number[0]), spell_numbers(number[1])))
text = text.replace(number_match.string.strip(), number, 1)
continue
if number.startswith("0"):
text = text.replace(number_match.string.strip(), prefix + spell_numbers(number), 1)
continue
text = text.replace(number_match.string.strip(), prefix + num2word(int(number)), 1)
return text.strip()