Spaces:
Running
Running
File size: 4,407 Bytes
19c634e 4468072 b1e6f9e 4468072 a2689f4 b1e6f9e a2689f4 b1e6f9e a2689f4 b1e6f9e a2689f4 b1e6f9e a2689f4 b1e6f9e a2689f4 b1e6f9e a2689f4 b1e6f9e a2689f4 b1e6f9e a2689f4 b1e6f9e a2689f4 19c634e 4468072 19c634e b1e6f9e f5aefe9 a2689f4 b1e6f9e a2689f4 b1e6f9e a2689f4 b1e6f9e a2689f4 b1e6f9e a2689f4 b1e6f9e a2689f4 b1e6f9e a2689f4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
import re
mapping = {
"n\u0303": "\xf1",
"g\u0306": "\u011f",
"i\u0307": "i",
"u\u0308": "\xfc",
"o\u0308": "\xf6",
"\xe7": "\u04ab",
"c\u0327": "\u04ab",
"s\u0327": "\u015f",
"a\u0302": "\xe2",
"w": "v",
"x": "ks",
}
zero = {
0: "sıfır",
}
numbers_map = {
1: "bir",
2: "eki",
3: "üç",
4: "dört",
5: "beş",
6: "altı",
7: "yedi",
8: "sekiz",
9: "doquz",
10: "on",
20: "yigirmi",
30: "otuz",
40: "qırq",
50: "elli",
60: "altmış",
70: "yetmiş",
80: "seksen",
90: "doqsan",
100: "yüz",
1000: "biñ",
1_000_000: "million",
1_000_000_000: "milliard",
}
def spell_numbers(numbers: str) -> str:
numbers_map_with_zero = {**numbers_map, **zero}
for i in range(0, 10):
numbers = numbers.replace(str(i), numbers_map_with_zero[i] + " ")
return numbers.strip()
def num2word(n):
if n in numbers_map:
return numbers_map[n]
elif n < 100:
tens = (n // 10) * 10
units = n % 10
if units == 0:
return ""
return (numbers_map[tens] + " " + numbers_map[units]).strip()
elif n < 1000:
hundreds = n // 100
rest = n % 100
return (
num2word(hundreds) + " " + numbers_map[100] + " " + num2word(rest)
).strip()
elif n < 1_000_000:
thousands = n // 1_000
rest = n % 1_000
return (
num2word(thousands) + " " + numbers_map[1_000] + " " + num2word(rest)
).strip()
elif n < 1_000_000_000:
millions = n // 1_000_000
rest = n % 1_000_000
return (
num2word(millions) + " " + numbers_map[1_000_000] + " " + num2word(rest)
).strip()
elif n < 1_000_000_000_000:
billions = n // 1_000_000_000
rest = n % 1_000_000_000
return (
num2word(billions) + " " + numbers_map[1_000_000_000] + " " + num2word(rest)
).strip()
else:
return spell_numbers(str(n))
def preprocess(text):
text = text.lower() # always treat lowercase
text = " " + text + " "
for symbol in mapping.keys():
text = re.sub(symbol, mapping[symbol], text)
separators = "?!" # TODO: add proper symbols to tts
for symbol in separators:
text = text.replace(symbol, ".")
while True:
groups_match = re.search("((\d,)+){2,}", text)
if groups_match is not None:
text = text.replace(
groups_match.string[groups_match.start() : groups_match.end()],
" ".join(
groups_match.string[
groups_match.start() : groups_match.end()
].split(",")
),
)
continue
number_match = re.search("(\-|\+)?(\d)+((\.|,)?\d+)?", text)
if number_match is None:
break
number = number_match.string[number_match.start() : number_match.end()]
number_to_replace = number
prefix = ""
if number.startswith("-"):
prefix = "minus "
number = number.replace("-", "", 1)
elif number.startswith("+"):
prefix = "plüs "
number = number.replace("+", "", 1)
if "." in number:
number = number.split(".")
number = prefix + " noqta ".join(
(
num2word(int(number[0]))
if int(number[0]) != 0
else spell_numbers(number[0]),
spell_numbers(number[1]),
)
)
text = text.replace(number_to_replace, number, 1)
continue
elif "," in number:
number = number.split(",")
number = prefix + " virgül ".join(
(
num2word(int(number[0]))
if int(number[0]) != 0
else spell_numbers(number[0]),
spell_numbers(number[1]),
)
)
text = text.replace(number_to_replace, number, 1)
continue
if number.startswith("0"):
text = text.replace(number_to_replace, prefix + spell_numbers(number), 1)
continue
text = text.replace(number_to_replace, prefix + num2word(int(number)), 1)
return text.strip()
|