File size: 4,407 Bytes
19c634e
 
4468072
 
 
 
 
 
 
 
 
 
 
b1e6f9e
4468072
a2689f4
 
b1e6f9e
a2689f4
 
 
b1e6f9e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a2689f4
 
 
 
b1e6f9e
a2689f4
b1e6f9e
 
a2689f4
 
 
 
 
 
 
 
 
b1e6f9e
 
a2689f4
 
 
b1e6f9e
 
 
a2689f4
 
 
b1e6f9e
 
 
a2689f4
 
 
b1e6f9e
 
 
a2689f4
 
 
b1e6f9e
 
 
a2689f4
 
 
19c634e
 
 
 
 
4468072
 
19c634e
b1e6f9e
f5aefe9
 
 
a2689f4
b1e6f9e
 
 
 
 
 
 
 
 
 
 
 
 
a2689f4
 
 
b1e6f9e
 
a2689f4
 
 
 
 
 
 
 
 
 
 
b1e6f9e
 
 
 
 
 
 
 
 
a2689f4
 
 
b1e6f9e
 
 
 
 
 
 
 
 
a2689f4
 
 
b1e6f9e
a2689f4
b1e6f9e
 
a2689f4
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import re

mapping = {
    "n\u0303": "\xf1",
    "g\u0306": "\u011f",
    "i\u0307": "i",
    "u\u0308": "\xfc",
    "o\u0308": "\xf6",
    "\xe7": "\u04ab",
    "c\u0327": "\u04ab",
    "s\u0327": "\u015f",
    "a\u0302": "\xe2",
    "w": "v",
    "x": "ks",
}

zero = {
    0: "sıfır",
}

numbers_map = {
    1: "bir",
    2: "eki",
    3: "üç",
    4: "dört",
    5: "beş",
    6: "altı",
    7: "yedi",
    8: "sekiz",
    9: "doquz",
    10: "on",
    20: "yigirmi",
    30: "otuz",
    40: "qırq",
    50: "elli",
    60: "altmış",
    70: "yetmiş",
    80: "seksen",
    90: "doqsan",
    100: "yüz",
    1000: "biñ",
    1_000_000: "million",
    1_000_000_000: "milliard",
}


def spell_numbers(numbers: str) -> str:
    numbers_map_with_zero = {**numbers_map, **zero}
    for i in range(0, 10):
        numbers = numbers.replace(str(i), numbers_map_with_zero[i] + " ")
    return numbers.strip()


def num2word(n):
    if n in numbers_map:
        return numbers_map[n]
    elif n < 100:
        tens = (n // 10) * 10
        units = n % 10
        if units == 0:
            return ""
        return (numbers_map[tens] + " " + numbers_map[units]).strip()
    elif n < 1000:
        hundreds = n // 100
        rest = n % 100
        return (
            num2word(hundreds) + " " + numbers_map[100] + " " + num2word(rest)
        ).strip()
    elif n < 1_000_000:
        thousands = n // 1_000
        rest = n % 1_000
        return (
            num2word(thousands) + " " + numbers_map[1_000] + " " + num2word(rest)
        ).strip()
    elif n < 1_000_000_000:
        millions = n // 1_000_000
        rest = n % 1_000_000
        return (
            num2word(millions) + " " + numbers_map[1_000_000] + " " + num2word(rest)
        ).strip()
    elif n < 1_000_000_000_000:
        billions = n // 1_000_000_000
        rest = n % 1_000_000_000
        return (
            num2word(billions) + " " + numbers_map[1_000_000_000] + " " + num2word(rest)
        ).strip()
    else:
        return spell_numbers(str(n))


def preprocess(text):
    text = text.lower()  # always treat lowercase
    text = " " + text + " "

    for symbol in mapping.keys():
        text = re.sub(symbol, mapping[symbol], text)

    separators = "?!"  # TODO: add proper symbols to tts
    for symbol in separators:
        text = text.replace(symbol, ".")

    while True:
        groups_match = re.search("((\d,)+){2,}", text)
        if groups_match is not None:
            text = text.replace(
                groups_match.string[groups_match.start() : groups_match.end()],
                " ".join(
                    groups_match.string[
                        groups_match.start() : groups_match.end()
                    ].split(",")
                ),
            )
            continue

        number_match = re.search("(\-|\+)?(\d)+((\.|,)?\d+)?", text)
        if number_match is None:
            break

        number = number_match.string[number_match.start() : number_match.end()]
        number_to_replace = number
        prefix = ""

        if number.startswith("-"):
            prefix = "minus "
            number = number.replace("-", "", 1)
        elif number.startswith("+"):
            prefix = "plüs "
            number = number.replace("+", "", 1)

        if "." in number:
            number = number.split(".")
            number = prefix + " noqta ".join(
                (
                    num2word(int(number[0]))
                    if int(number[0]) != 0
                    else spell_numbers(number[0]),
                    spell_numbers(number[1]),
                )
            )
            text = text.replace(number_to_replace, number, 1)
            continue
        elif "," in number:
            number = number.split(",")
            number = prefix + " virgül ".join(
                (
                    num2word(int(number[0]))
                    if int(number[0]) != 0
                    else spell_numbers(number[0]),
                    spell_numbers(number[1]),
                )
            )
            text = text.replace(number_to_replace, number, 1)
            continue

        if number.startswith("0"):
            text = text.replace(number_to_replace, prefix + spell_numbers(number), 1)
            continue

        text = text.replace(number_to_replace, prefix + num2word(int(number)), 1)

    return text.strip()