Spaces:
Running
Running
File size: 3,937 Bytes
c9574d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
"""
This file is adapted from https://github.com/hpbyte/Myanmar_Number_to_Words
"""
import re
mm_digit = {
"แ": "แแฏแ",
"แ": "แแ
แบ",
"แ": "แแพแ
แบ",
"แ": "แแฏแถ:",
"แ": "แแฑ:",
"แ
": "แแซ:",
"แ": "แแผแฑแฌแแบ",
"แ": "แแฏแแพแ
แบ",
"แ": "แแพแ
แบ",
"แ": "แแญแฏ:",
}
# regular expressions
rgxPh = "^(แแ|แแ)"
rgxDate = "[แ-แ]{1,2}-[แ-แ]{1,2}-[แ-แ]{4}|[แ-แ]{1,2}\/[แ-แ]{1,2}\/[แ-แ]{4}"
rgxTime = "[แ-แ]{1,2}:[แ-แ]{1,2}"
rgxDec = "[แ-แ]*\.[แ-แ]*"
rgxAmt = "[,แ-แ]+"
def convert_digit(num):
"""
@type num str
@param num Myanmar number
@rtype str
@return converted Myanmar spoken words
"""
converted = ""
nb_digits = len(num)
def check_if_zero(pos):
return not num[-pos] == "แ"
def hundred_thousandth_val():
n = num[:-5]
return (
("แแญแแบ: " + mm_num2word(n))
if (n[-2:] == "แแ")
else (mm_num2word(n) + "แแญแแบ: ")
)
def thousandth_val():
return mm_digit[num[-4]] + ("แแฑแฌแแบ " if (num[-3:] == "แแแ") else "แแฑแฌแแบแท ")
def hundredth_val():
return mm_digit[num[-3]] + (
"แแฌแท "
if (
(num[-2] == "แ" and re.match(r"[แ-แ]", num[-1]))
or (re.match(r"[แ-แ]", num[-2]) and num[-1] == "แ")
)
else "แแฌ "
)
def tenth_val():
return ("" if (num[-2] == "แ") else mm_digit[num[-2]]) + (
"แแแบ " if (num[-1] == "แ") else "แแแบแท "
)
if nb_digits > 5:
converted += hundred_thousandth_val()
if (nb_digits > 4) and check_if_zero(5):
converted += mm_digit[num[-5]] + "แแฑแฌแแบ: "
if (nb_digits > 3) and check_if_zero(4):
converted += thousandth_val()
if (nb_digits > 2) and check_if_zero(3):
converted += hundredth_val()
if (nb_digits > 1) and check_if_zero(2):
converted += tenth_val()
if (nb_digits > 0) and check_if_zero(1):
converted += mm_digit[num[-1]]
return converted
def mm_num2word(num):
"""
Detect type of number and convert accordingly
@type num str
@param num Myanmar number
@rtype str
@return converted Myanmar spoken words
"""
word = ""
# phone number
if re.match(r"" + rgxPh, num[:2]):
word = " ".join([(mm_digit[d] if not d == "แ" else "แแฝแแบ") for d in num])
# date
elif re.match(r"" + rgxDate, num):
n = re.split(r"-|/", num)
word = (
convert_digit(n[-1])
+ " แแฏแแพแ
แบ "
+ convert_digit(n[1])
+ " แแแญแฏแแบ: "
+ convert_digit(n[0])
+ " แแแบ"
)
# time
elif re.match(r"" + rgxTime, num):
n = re.split(r":", num)
word = (convert_digit(n[0]) + " แแฌแแฎ ") + (
"แแฝแฒ" if (n[1] == "แแ") else (convert_digit(n[1]) + " แแญแแ
แบ")
)
# decimal
elif re.match(r"" + rgxDec, num):
n = re.split(r"\.", num)
word = convert_digit(n[0]) + " แแฟแ " + " ".join([mm_digit[d] for d in n[1]])
# amount
elif re.match(r"" + rgxAmt, num):
word = convert_digit(num.replace(",", ""))
# default
else:
raise Exception("Cannot convert the provided number format!")
return word
def extract_num(S):
"""
Extract numbers from the input string
@type S str
@param S Myanmar sentence
@rtype list
@return a list of Myanmar numbers
"""
matchedNums = re.compile(
"%s|%s|%s|%s" % (rgxDate, rgxTime, rgxDec, rgxAmt)
).findall(S)
return matchedNums
|