import re mapping = { "n\u0303": "\xf1", "g\u0306": "\u011f", "i\u0307": "i", "u\u0308": "\xfc", "o\u0308": "\xf6", "\xe7": "\u04ab", "c\u0327": "\u04ab", "s\u0327": "\u015f", "a\u0302": "\xe2", "w": "v", "x": "ks", } zero = { 0: "sıfır", } numbers_map = { 1: "bir", 2: "eki", 3: "üç", 4: "dört", 5: "beş", 6: "altı", 7: "yedi", 8: "sekiz", 9: "doquz", 10: "on", 20: "yigirmi", 30: "otuz", 40: "qırq", 50: "elli", 60: "altmış", 70: "yetmiş", 80: "seksen", 90: "doqsan", 100: "yüz", 1000: "biñ", 1_000_000: "million", 1_000_000_000: "milliard", } def spell_numbers(numbers: str) -> str: numbers_map_with_zero = {**numbers_map, **zero} for i in range(0, 10): numbers = numbers.replace(str(i), numbers_map_with_zero[i] + " ") return numbers.strip() def num2word(n): if n in numbers_map: return numbers_map[n] elif n < 100: tens = (n // 10) * 10 units = n % 10 if units == 0: return "" return (numbers_map[tens] + " " + numbers_map[units]).strip() elif n < 1000: hundreds = n // 100 rest = n % 100 return ( num2word(hundreds) + " " + numbers_map[100] + " " + num2word(rest) ).strip() elif n < 1_000_000: thousands = n // 1_000 rest = n % 1_000 return ( num2word(thousands) + " " + numbers_map[1_000] + " " + num2word(rest) ).strip() elif n < 1_000_000_000: millions = n // 1_000_000 rest = n % 1_000_000 return ( num2word(millions) + " " + numbers_map[1_000_000] + " " + num2word(rest) ).strip() elif n < 1_000_000_000_000: billions = n // 1_000_000_000 rest = n % 1_000_000_000 return ( num2word(billions) + " " + numbers_map[1_000_000_000] + " " + num2word(rest) ).strip() else: return spell_numbers(str(n)) def preprocess(text): text = text.lower() # always treat lowercase text = " " + text + " " for symbol in mapping.keys(): text = re.sub(symbol, mapping[symbol], text) separators = "?!" # TODO: add proper symbols to tts for symbol in separators: text = text.replace(symbol, ".") while True: groups_match = re.search("((\d,)+){2,}", text) if groups_match is not None: text = text.replace( groups_match.string[groups_match.start() : groups_match.end()], " ".join( groups_match.string[ groups_match.start() : groups_match.end() ].split(",") ), ) continue number_match = re.search("(\-|\+)?(\d)+((\.|,)?\d+)?", text) if number_match is None: break number = number_match.string[number_match.start() : number_match.end()] number_to_replace = number prefix = "" if number.startswith("-"): prefix = "minus " number = number.replace("-", "", 1) elif number.startswith("+"): prefix = "plüs " number = number.replace("+", "", 1) if "." in number: number = number.split(".") number = prefix + " noqta ".join( ( num2word(int(number[0])) if int(number[0]) != 0 else spell_numbers(number[0]), spell_numbers(number[1]), ) ) text = text.replace(number_to_replace, number, 1) continue elif "," in number: number = number.split(",") number = prefix + " virgül ".join( ( num2word(int(number[0])) if int(number[0]) != 0 else spell_numbers(number[0]), spell_numbers(number[1]), ) ) text = text.replace(number_to_replace, number, 1) continue if number.startswith("0"): text = text.replace(number_to_replace, prefix + spell_numbers(number), 1) continue text = text.replace(number_to_replace, prefix + num2word(int(number)), 1) return text.strip()