cdactvm commited on
Commit
c298401
·
verified ·
1 Parent(s): 146f2c8

Update processDoubles.py

Browse files
Files changed (1) hide show
  1. processDoubles.py +25 -54
processDoubles.py CHANGED
@@ -1,54 +1,25 @@
1
- #!/usr/bin/env python
2
- # coding: utf-8
3
-
4
- # In[2]:
5
-
6
-
7
- # # Function to process "double" followed by a number
8
- # def process_doubles(sentence):
9
- # tokens = sentence.split()
10
- # result = []
11
- # i = 0
12
- # while i < len(tokens):
13
- # if tokens[i] == "डबल":
14
- # if i + 1 < len(tokens):
15
- # result.append(tokens[i + 1])
16
- # result.append(tokens[i + 1])
17
- # i += 2
18
- # else:
19
- # result.append(tokens[i])
20
- # i += 1
21
- # else:
22
- # result.append(tokens[i])
23
- # i += 1
24
- # return ' '.join(result)
25
-
26
-
27
- # In[ ]:
28
-
29
-
30
- import re
31
-
32
- def process_doubles(sentence):
33
- # Use regex to split 'डबल' followed by numbers/words without space (e.g., "डबलवन" -> "डबल वन")
34
- sentence = re.sub(r'(डबल)(\S+)', r'\1 \2', sentence)
35
-
36
- tokens = sentence.split()
37
- result = []
38
- i = 0
39
-
40
- while i < len(tokens):
41
- if tokens[i] == "डबल":
42
- if i + 1 < len(tokens):
43
- result.append(tokens[i + 1]) # Append the next word/number
44
- result.append(tokens[i + 1]) # Append the next word/number again to duplicate
45
- i += 2 # Skip over the next word since it's already processed
46
- else:
47
- result.append(tokens[i])
48
- i += 1
49
- else:
50
- result.append(tokens[i])
51
- i += 1
52
-
53
- return ' '.join(result)
54
-
 
1
+ import re
2
+
3
+ def process_doubles(sentence):
4
+ # Use regex to split 'डबल' followed by numbers/words without space (e.g., "डबलवन" -> "डबल वन")
5
+ sentence = re.sub(r'(டபுள்)(\S+)', r'\1 \2', sentence)
6
+
7
+ tokens = sentence.split()
8
+ result = []
9
+ i = 0
10
+
11
+ while i < len(tokens):
12
+ if tokens[i] == "டபுள்":
13
+ if i + 1 < len(tokens):
14
+ result.append(tokens[i + 1]) # Append the next word/number
15
+ result.append(tokens[i + 1]) # Append the next word/number again to duplicate
16
+ i += 2 # Skip over the next word since it's already processed
17
+ else:
18
+ result.append(tokens[i])
19
+ i += 1
20
+ else:
21
+ result.append(tokens[i])
22
+ i += 1
23
+
24
+ return ' '.join(result)
25
+