Spaces:
Running
Running
File size: 3,720 Bytes
19c634e 4468072 a2689f4 19c634e 4468072 19c634e f5aefe9 a2689f4 19c634e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import re
mapping = {
"n\u0303": "\xf1",
"g\u0306": "\u011f",
"i\u0307": "i",
"u\u0308": "\xfc",
"o\u0308": "\xf6",
"\xe7": "\u04ab",
"c\u0327": "\u04ab",
"s\u0327": "\u015f",
"a\u0302": "\xe2",
"w": "v",
"x": "ks"
}
zero = {
0: 'sıfır',
}
numbers_map = {
1: 'bir',
2: 'eki',
3: 'üç',
4: 'dört',
5: 'beş',
6: 'altı',
7: 'yedi',
8: 'sekiz',
9: 'doquz',
10: 'on',
20: 'yigirmi',
30: 'otuz',
40: 'qırq',
50: 'elli',
60: 'altmış',
70: 'yetmiş',
80: 'seksen',
90: 'doqsan',
100: 'yüz',
1000: 'biñ',
1_000_000: 'million',
1_000_000_000: 'milliard'
}
def spell_numbers(numbers: str) -> str:
numbers_map_with_zero = {**numbers_map,**zero}
for i in range(0, 10):
numbers = numbers.replace(str(i), numbers_map_with_zero[i] + ' ')
return numbers.strip()
def num2word(n):
if n in numbers_map:
return numbers_map[n]
elif n < 100:
tens = (n // 10) * 10
units = n % 10
if units == 0:
return ''
return (numbers_map[tens] + ' ' + numbers_map[units]).strip()
elif n < 1000:
hundreds = n // 100
rest = n % 100
return (num2word(hundreds) + ' ' + numbers_map[100] + ' ' + num2word(rest)).strip()
elif n < 1_000_000:
thousands = n // 1_000
rest = n % 1_000
return (num2word(thousands) + ' ' + numbers_map[1_000] + ' ' + num2word(rest)).strip()
elif n < 1_000_000_000:
millions = n // 1_000_000
rest = n % 1_000_000
return (num2word(millions) + ' ' + numbers_map[1_000_000] + ' ' + num2word(rest)).strip()
elif n < 1_000_000_000_000:
billions = n // 1_000_000_000
rest = n % 1_000_000_000
return (num2word(billions) + ' ' + numbers_map[1_000_000_000] + ' ' + num2word(rest)).strip()
else:
return spell_numbers(str(n))
def preprocess(text):
text = text.lower() # always treat lowercase
text = " " + text + " "
for symbol in mapping.keys():
text = re.sub(symbol, mapping[symbol], text)
separators = "?!" # TODO: add proper symbols to tts
for symbol in separators:
text = text.replace(symbol, ".")
while True:
number_match = re.search("-?\d+(\.|,)?(\d+)?", text)
if number_match is None:
break
print(number_match.string, number_match.start(), number_match.end())
number = number_match.string.strip()
prefix = ""
if number.startswith("-"):
prefix = "minus "
number = number.replace("-", "", 1)
elif number.startswith("+"):
prefix = "plüs "
number = number.replace("+", "", 1)
if "." in number:
number = number.split(".")
number = prefix + " noqta ".join((num2word(int(number[0])) if int(number[0]) != 0 else spell_numbers(number[0]), spell_numbers(number[1])))
text = text.replace(number_match.string.strip(), number, 1)
continue
elif "," in number:
number = number.split(",")
number = prefix + " virgül ".join((num2word(int(number[0])) if int(number[0]) != 0 else spell_numbers(number[0]), spell_numbers(number[1])))
text = text.replace(number_match.string.strip(), number, 1)
continue
if number.startswith("0"):
text = text.replace(number_match.string.strip(), prefix + spell_numbers(number), 1)
continue
text = text.replace(number_match.string.strip(), prefix + num2word(int(number)), 1)
return text.strip()
|