cdactvm commited on
Commit
46b307b
·
verified ·
1 Parent(s): 5f6cffd

Delete text2int.ipynb

Browse files
Files changed (1) hide show
  1. text2int.ipynb +0 -233
text2int.ipynb DELETED
@@ -1,233 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": null,
6
- "id": "61185b34-45e0-4a78-a84b-2cedd08ad39a",
7
- "metadata": {},
8
- "outputs": [],
9
- "source": [
10
- "# # Function to convert Hindi text to numerical representation\n",
11
- "# from isNumber import is_number\n",
12
- "\n",
13
- "# def text_to_int (textnum, numwords={}):\n",
14
- "# units = ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight',\n",
15
- "# 'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen',\n",
16
- "# 'sixteen', 'seventeen', 'eighteen', 'nineteen']\n",
17
- "# tens = ['', '', 'twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety']\n",
18
- "# scales = ['hundred', 'thousand', 'lac','million', 'billion', 'trillion']\n",
19
- "# ordinal_words = {'first':1, 'second':2, 'third':3, 'fifth':5, 'eighth':8, 'ninth':9, 'twelfth':12}\n",
20
- "# ordinal_endings = [('ieth', 'y'), ('th', '')]\n",
21
- "\n",
22
- "# if not numwords:\n",
23
- "# numwords['and'] = (1, 0)\n",
24
- "# for idx, word in enumerate(units): numwords[word] = (1, idx)\n",
25
- "# for idx, word in enumerate(tens): numwords[word] = (1, idx * 10)\n",
26
- "# for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0)\n",
27
- "\n",
28
- "# textnum = textnum.replace('-', ' ')\n",
29
- "\n",
30
- "# current = result = 0\n",
31
- "# curstring = ''\n",
32
- "# onnumber = False\n",
33
- "# lastunit = False\n",
34
- "# lastscale = False\n",
35
- "\n",
36
- "# def is_numword(x):\n",
37
- "# if is_number(x):\n",
38
- "# return True\n",
39
- "# if word in numwords:\n",
40
- "# return True\n",
41
- "# return False\n",
42
- "\n",
43
- "# def from_numword(x):\n",
44
- "# if is_number(x):\n",
45
- "# scale = 0\n",
46
- "# increment = int(x.replace(',', ''))\n",
47
- "# return scale, increment\n",
48
- "# return numwords[x]\n",
49
- "\n",
50
- "# for word in textnum.split():\n",
51
- "# if word in ordinal_words:\n",
52
- "# scale, increment = (1, ordinal_words[word])\n",
53
- "# current = current * scale + increment\n",
54
- "# if scale > 100:\n",
55
- "# result += current\n",
56
- "# current = 0\n",
57
- "# onnumber = True\n",
58
- "# lastunit = False\n",
59
- "# lastscale = False\n",
60
- "# else:\n",
61
- "# for ending, replacement in ordinal_endings:\n",
62
- "# if word.endswith(ending):\n",
63
- "# word = \"%s%s\" % (word[:-len(ending)], replacement)\n",
64
- "\n",
65
- "# if (not is_numword(word)) or (word == 'and' and not lastscale):\n",
66
- "# if onnumber:\n",
67
- "# # Flush the current number we are building\n",
68
- "# curstring += repr(result + current) + \" \"\n",
69
- "# curstring += word + \" \"\n",
70
- "# result = current = 0\n",
71
- "# onnumber = False\n",
72
- "# lastunit = False\n",
73
- "# lastscale = False\n",
74
- "# else:\n",
75
- "# scale, increment = from_numword(word)\n",
76
- "# onnumber = True\n",
77
- "\n",
78
- "# if lastunit and (word not in scales): \n",
79
- "# # Assume this is part of a string of individual numbers to \n",
80
- "# # be flushed, such as a zipcode \"one two three four five\" \n",
81
- "# curstring += repr(result + current) \n",
82
- "# result = current = 0 \n",
83
- "\n",
84
- "# if scale > 1: \n",
85
- "# current = max(1, current) \n",
86
- "\n",
87
- "# current = current * scale + increment \n",
88
- "# if scale > 100: \n",
89
- "# result += current \n",
90
- "# current = 0 \n",
91
- "\n",
92
- "# lastscale = False \n",
93
- "# lastunit = False \n",
94
- "# if word in scales: \n",
95
- "# lastscale = True \n",
96
- "# elif word in units: \n",
97
- "# lastunit = True\n",
98
- "\n",
99
- "# if onnumber:\n",
100
- "# curstring += repr(result + current)\n",
101
- "\n",
102
- "# return curstring\n"
103
- ]
104
- },
105
- {
106
- "cell_type": "code",
107
- "execution_count": 3,
108
- "id": "a87b26d7-4a0e-4fdc-b03e-1537600faf65",
109
- "metadata": {},
110
- "outputs": [],
111
- "source": [
112
- "import nbimporter\n",
113
- "from isNumber import is_number # Remove or replace this if unnecessary\n",
114
- "\n",
115
- "def text_to_int(textnum, numwords={}):\n",
116
- " # Define units, tens, and scales including \"lac\"\n",
117
- " units = ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight',\n",
118
- " 'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen',\n",
119
- " 'sixteen', 'seventeen', 'eighteen', 'nineteen']\n",
120
- " tens = ['', '', 'twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety']\n",
121
- " scales = ['hundred', 'thousand', 'lac', 'million', 'billion', 'trillion'] # \"lac\" added\n",
122
- " ordinal_words = {'first': 1, 'second': 2, 'third': 3, 'fifth': 5, 'eighth': 8, 'ninth': 9, 'twelfth': 12}\n",
123
- " ordinal_endings = [('ieth', 'y'), ('th', '')]\n",
124
- "\n",
125
- " if not numwords:\n",
126
- " numwords['and'] = (1, 0) # Handle \"one hundred and twenty\"\n",
127
- " \n",
128
- " # Add units, tens, and scales to numwords\n",
129
- " for idx, word in enumerate(units):\n",
130
- " numwords[word] = (1, idx)\n",
131
- " for idx, word in enumerate(tens):\n",
132
- " numwords[word] = (1, idx * 10)\n",
133
- " \n",
134
- " for idx, word in enumerate(scales):\n",
135
- " numwords[word] = (10 ** (5 if word == 'lac' else idx * 3 or 2), 0) # Handle \"lac\" as 10^5\n",
136
- "\n",
137
- " # Remove hyphens and normalize input\n",
138
- " textnum = textnum.replace('-', ' ')\n",
139
- "\n",
140
- " current = result = 0\n",
141
- " curstring = ''\n",
142
- " onnumber = False\n",
143
- " lastunit = False\n",
144
- " lastscale = False\n",
145
- "\n",
146
- " def is_numword(x):\n",
147
- " return is_number(x) or x in numwords\n",
148
- "\n",
149
- " def from_numword(x):\n",
150
- " if is_number(x):\n",
151
- " return 0, int(x.replace(',', ''))\n",
152
- " return numwords[x]\n",
153
- "\n",
154
- " for word in textnum.split():\n",
155
- " if word in ordinal_words:\n",
156
- " scale, increment = (1, ordinal_words[word])\n",
157
- " current = current * scale + increment\n",
158
- " if scale > 100:\n",
159
- " result += current\n",
160
- " current = 0\n",
161
- " onnumber = True\n",
162
- " lastunit = False\n",
163
- " lastscale = False\n",
164
- " else:\n",
165
- " for ending, replacement in ordinal_endings:\n",
166
- " if word.endswith(ending):\n",
167
- " word = f\"{word[:-len(ending)]}{replacement}\"\n",
168
- "\n",
169
- " if not is_numword(word) or (word == 'and' and not lastscale):\n",
170
- " if onnumber:\n",
171
- " curstring += repr(result + current) + \" \"\n",
172
- " curstring += word + \" \"\n",
173
- " result = current = 0\n",
174
- " onnumber = False\n",
175
- " lastunit = False\n",
176
- " lastscale = False\n",
177
- " else:\n",
178
- " scale, increment = from_numword(word)\n",
179
- " onnumber = True\n",
180
- "\n",
181
- " if lastunit and word not in scales:\n",
182
- " curstring += repr(result + current) + \" \"\n",
183
- " result = current = 0\n",
184
- "\n",
185
- " if scale > 1:\n",
186
- " current = max(1, current)\n",
187
- "\n",
188
- " current = current * scale + increment\n",
189
- "\n",
190
- " if scale >= 100:\n",
191
- " result += current\n",
192
- " current = 0\n",
193
- "\n",
194
- " lastscale = word in scales\n",
195
- " lastunit = word in units\n",
196
- "\n",
197
- " if onnumber:\n",
198
- " curstring += repr(result + current)\n",
199
- "\n",
200
- " return curstring.strip()"
201
- ]
202
- },
203
- {
204
- "cell_type": "code",
205
- "execution_count": null,
206
- "id": "83997c73-e1b4-4863-b1df-d6de6153e80d",
207
- "metadata": {},
208
- "outputs": [],
209
- "source": []
210
- }
211
- ],
212
- "metadata": {
213
- "kernelspec": {
214
- "display_name": "Python 3 (ipykernel)",
215
- "language": "python",
216
- "name": "python3"
217
- },
218
- "language_info": {
219
- "codemirror_mode": {
220
- "name": "ipython",
221
- "version": 3
222
- },
223
- "file_extension": ".py",
224
- "mimetype": "text/x-python",
225
- "name": "python",
226
- "nbconvert_exporter": "python",
227
- "pygments_lexer": "ipython3",
228
- "version": "3.11.7"
229
- }
230
- },
231
- "nbformat": 4,
232
- "nbformat_minor": 5
233
- }