File size: 3,720 Bytes
19c634e
 
4468072
 
 
 
 
 
 
 
 
 
 
 
 
a2689f4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19c634e
 
 
 
 
4468072
 
19c634e
f5aefe9
 
 
 
a2689f4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19c634e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import re

mapping = {
    "n\u0303": "\xf1",
    "g\u0306": "\u011f",
    "i\u0307": "i",
    "u\u0308": "\xfc",
    "o\u0308": "\xf6",
    "\xe7": "\u04ab",
    "c\u0327": "\u04ab",
    "s\u0327": "\u015f",
    "a\u0302": "\xe2",
    "w": "v",
    "x": "ks"
}

zero = {
    0: 'sıfır',
}

numbers_map = {
    1: 'bir',
    2: 'eki',
    3: 'üç',
    4: 'dört',
    5: 'beş',
    6: 'altı',
    7: 'yedi',
    8: 'sekiz',
    9: 'doquz',
    10: 'on',
    20: 'yigirmi',
    30: 'otuz',
    40: 'qırq',
    50: 'elli',
    60: 'altmış',
    70: 'yetmiş',
    80: 'seksen',
    90: 'doqsan',
    100: 'yüz',
    1000: 'biñ',
    1_000_000: 'million',
    1_000_000_000: 'milliard'
}


def spell_numbers(numbers: str) -> str:
    numbers_map_with_zero = {**numbers_map,**zero}
    for i in range(0, 10):
        numbers = numbers.replace(str(i), numbers_map_with_zero[i] + ' ')
    return numbers.strip()  


def num2word(n):
    if n in numbers_map:
        return numbers_map[n]
    elif n < 100:
        tens = (n // 10) * 10
        units = n % 10
        if units == 0:
            return ''
        return (numbers_map[tens] + ' ' + numbers_map[units]).strip()
    elif n < 1000:
        hundreds = n // 100
        rest = n % 100
        return (num2word(hundreds) + ' ' + numbers_map[100] + ' ' + num2word(rest)).strip()
    elif n < 1_000_000:
        thousands = n // 1_000
        rest = n % 1_000
        return (num2word(thousands) + ' ' + numbers_map[1_000] + ' ' + num2word(rest)).strip()
    elif n < 1_000_000_000:
        millions = n // 1_000_000
        rest = n % 1_000_000
        return (num2word(millions) + ' ' + numbers_map[1_000_000] + ' ' + num2word(rest)).strip()
    elif n < 1_000_000_000_000:
        billions = n // 1_000_000_000
        rest = n % 1_000_000_000
        return (num2word(billions) + ' ' + numbers_map[1_000_000_000] + ' ' + num2word(rest)).strip()
    else:
        return spell_numbers(str(n))


def preprocess(text):
    text = text.lower()  # always treat lowercase
    text = " " + text + " "

    for symbol in mapping.keys():
        text = re.sub(symbol, mapping[symbol], text)

    separators = "?!" # TODO: add proper symbols to tts
    for symbol in separators:
        text = text.replace(symbol, ".")

    while True:
        number_match = re.search("-?\d+(\.|,)?(\d+)?", text)
        
        if number_match is None:
            break
        
        print(number_match.string, number_match.start(), number_match.end())
        
        number = number_match.string.strip()

        prefix = ""

        if number.startswith("-"):
            prefix = "minus "
            number = number.replace("-", "", 1)
        elif number.startswith("+"):
            prefix = "plüs "
            number = number.replace("+", "", 1)

        if "." in number:
            number = number.split(".")
            number = prefix + " noqta ".join((num2word(int(number[0])) if int(number[0]) != 0 else spell_numbers(number[0]), spell_numbers(number[1])))
            text = text.replace(number_match.string.strip(), number, 1)
            continue
        elif "," in number:
            number = number.split(",")
            number = prefix + " virgül ".join((num2word(int(number[0])) if int(number[0]) != 0 else spell_numbers(number[0]), spell_numbers(number[1])))
            text = text.replace(number_match.string.strip(), number, 1)
            continue

        if number.startswith("0"):
            text = text.replace(number_match.string.strip(), prefix + spell_numbers(number), 1)
            continue
        
        text = text.replace(number_match.string.strip(), prefix + num2word(int(number)), 1)
        

    return text.strip()