import re def is_number(s): try: float(s.replace(',', '')) # Handles numbers with commas return True except ValueError: return False def text_to_int(textnum, numwords={}): units = ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen'] tens = ['', '', 'twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety'] scales = ['hundred', 'thousand', 'lac', 'million', 'billion', 'trillion'] # "lac" handled as 10^5 ordinal_words = {'first': 1, 'second': 2, 'third': 3, 'fourth': 4, 'fifth': 5, 'sixth': 6, 'seventh': 7, 'eighth': 8, 'ninth': 9, 'tenth': 10, 'eleventh': 11, 'twelfth': 12} ordinal_endings = [('ieth', 'y'), ('th', '')] if not numwords: numwords['and'] = (1, 0) # Handle "one hundred and twenty" for idx, word in enumerate(units): numwords[word] = (1, idx) for idx, word in enumerate(tens): if word: numwords[word] = (1, idx * 10) for idx, word in enumerate(scales): numwords[word] = (10 ** (5 if word == 'lac' else idx * 3 or 2), 0) textnum = textnum.lower().replace('-', ' ') # Normalize input words = textnum.split() current = result = 0 curstring = '' onnumber = False lastunit = False lastscale = False decimal_part = [] is_decimal = False def is_numword(x): return is_number(x) or x in numwords def from_numword(x): if is_number(x): return 0, int(x.replace(',', '')) return numwords[x] for word in words: if word == 'point': is_decimal = True continue for ending, replacement in ordinal_endings: if word.endswith(ending): word = f"{word[:-len(ending)]}{replacement}" if word in ordinal_words: scale, increment = (1, ordinal_words[word]) current = current * scale + increment if scale > 100: result += current current = 0 onnumber = True lastunit = False lastscale = False elif is_numword(word): scale, increment = from_numword(word) onnumber = True if is_decimal: decimal_part.append(str(increment)) continue if lastunit and word not in scales: curstring += str(result + current) + " " result = current = 0 if scale > 1: current = max(1, current) current = current * scale + increment if scale >= 100: result += current current = 0 lastscale = word in scales lastunit = word in units elif word == 'and' and lastscale: continue # Ignore "and" when used in valid contexts else: if onnumber: curstring += str(result + current) + " " curstring += word + " " result = current = 0 onnumber = False lastunit = False lastscale = False if onnumber: curstring += str(result + current) if decimal_part: curstring += '.' + ''.join(decimal_part) return curstring.strip()