guymorlan commited on
Commit
c5a719e
·
verified ·
1 Parent(s): 25594a0

Update translit.py

Browse files
Files changed (1) hide show
  1. translit.py +7 -16
translit.py CHANGED
@@ -176,23 +176,14 @@ def to_taatik(arabic):
176
 
177
 
178
  def postprocess_arabic_transliteration(text):
179
- # New step: Convert 'aā' to 'ā', but keep 'aaā'
180
- text = re.sub(r'a(ā)(?!ā)', r'\1', text)
181
-
182
- # Step 1: Replace long vowels at the end of words
183
  text = re.sub(r'([āīēūō])(\W*$|\W+)', lambda m: m.group(1).translate(str.maketrans('āīēūō', 'aieuo')) + m.group(2), text)
184
-
185
- # Step 2: Convert 'iy' to 'ī', but keep 'iyy'
186
- text = re.sub(r'iy(?!y)', 'ī', text)
187
-
188
- # Step 3: Convert 'uw' to 'ū', but keep 'uww'
189
- text = re.sub(r'uw(?!w)', 'ū', text)
190
-
191
- # Step 4: Convert 'ay' to 'ē', but keep 'ayy'
192
- text = re.sub(r'ay(?!y)', 'ē', text)
193
-
194
- # Step 5: Convert 'aw' to 'ō', but keep 'aww'
195
- text = re.sub(r'aw(?!w)', 'ō', text)
196
 
197
  return text
198
 
 
176
 
177
 
178
  def postprocess_arabic_transliteration(text):
 
 
 
 
179
  text = re.sub(r'([āīēūō])(\W*$|\W+)', lambda m: m.group(1).translate(str.maketrans('āīēūō', 'aieuo')) + m.group(2), text)
180
+
181
+ text = re.sub(r'ᵃ(ā)(?!ā)', r'\1', text)
182
+ text = re.sub(r'ⁱy(?!y)', 'ī', text)
183
+ text = re.sub(r'ᵒw(?!w)', 'ō', text)
184
+ text = re.sub(r'ᵘw(?!w)', 'ū', text)
185
+ text = re.sub(r'ᵃy(?!y)', 'ē', text)
186
+ text = re.sub(r'ᵃw(?!w)', 'ō', text)
 
 
 
 
 
187
 
188
  return text
189