Spaces:
Running
Running
Update translit.py
Browse files- translit.py +29 -11
translit.py
CHANGED
|
@@ -94,12 +94,12 @@ arabic_to_english = {
|
|
| 94 |
"ُ": "u",
|
| 95 |
"ِ": "i",
|
| 96 |
"،": ",",
|
| 97 |
-
"ֹ": "
|
| 98 |
-
"ַ": "
|
| 99 |
-
"ִ": "
|
| 100 |
"ְ": "", # shva
|
| 101 |
-
"ֻ": "
|
| 102 |
-
'ֵ': "
|
| 103 |
"ّ": "SHADDA" # shadda
|
| 104 |
}
|
| 105 |
|
|
@@ -155,6 +155,7 @@ def reverse_holam_shadda_vav(input_string):
|
|
| 155 |
|
| 156 |
return result
|
| 157 |
|
|
|
|
| 158 |
def to_taatik(arabic):
|
| 159 |
taatik = []
|
| 160 |
for index, letter in enumerate(arabic):
|
|
@@ -174,7 +175,23 @@ def to_taatik(arabic):
|
|
| 174 |
return reordered
|
| 175 |
|
| 176 |
|
| 177 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
|
| 179 |
def to_translit(arabic):
|
| 180 |
translit = []
|
|
@@ -184,15 +201,16 @@ def to_translit(arabic):
|
|
| 184 |
else:
|
| 185 |
if arabic_to_english[letter] == "SHADDA":
|
| 186 |
if translit[-1][0] in vowels:
|
| 187 |
-
translit[-2][1] = translit[-2][1].upper()
|
|
|
|
| 188 |
else:
|
| 189 |
-
translit[-1][1] = translit[-1][1].upper()
|
| 190 |
-
|
|
|
|
| 191 |
else:
|
| 192 |
translit.append([letter, arabic_to_english[letter]])
|
| 193 |
|
| 194 |
-
return "".join([x[1] for x in translit])
|
| 195 |
-
|
| 196 |
|
| 197 |
# %%
|
| 198 |
|
|
|
|
| 94 |
"ُ": "u",
|
| 95 |
"ِ": "i",
|
| 96 |
"،": ",",
|
| 97 |
+
"ֹ": "ᵒ", # holam
|
| 98 |
+
"ַ": "ᵃ", # patah
|
| 99 |
+
"ִ": "ⁱ", # hiriq
|
| 100 |
"ְ": "", # shva
|
| 101 |
+
"ֻ": "ᵘ", # kubutz
|
| 102 |
+
'ֵ': "ᵉ" # tzere
|
| 103 |
"ّ": "SHADDA" # shadda
|
| 104 |
}
|
| 105 |
|
|
|
|
| 155 |
|
| 156 |
return result
|
| 157 |
|
| 158 |
+
|
| 159 |
def to_taatik(arabic):
|
| 160 |
taatik = []
|
| 161 |
for index, letter in enumerate(arabic):
|
|
|
|
| 175 |
return reordered
|
| 176 |
|
| 177 |
|
| 178 |
+
def postprocess_arabic_transliteration(text):
|
| 179 |
+
# Step 1: Replace long vowels at the end of words
|
| 180 |
+
text = re.sub(r'([āīēūō])(\W*$|\W+)', lambda m: m.group(1).translate(str.maketrans('āīēūō', 'aieuo')) + m.group(2), text)
|
| 181 |
+
|
| 182 |
+
# Step 2: Convert 'iy' to 'ī', but keep 'iyy'
|
| 183 |
+
text = re.sub(r'iy(?!y)', 'ī', text)
|
| 184 |
+
|
| 185 |
+
# Step 3: Convert 'uw' to 'ū', but keep 'uww'
|
| 186 |
+
text = re.sub(r'uw(?!w)', 'ū', text)
|
| 187 |
+
|
| 188 |
+
# Step 4: Convert 'ay' to 'ē', but keep 'ayy'
|
| 189 |
+
text = re.sub(r'ay(?!y)', 'ē', text)
|
| 190 |
+
|
| 191 |
+
# Step 5: Convert 'aw' to 'ō', but keep 'aww'
|
| 192 |
+
text = re.sub(r'aw(?!w)', 'ō', text)
|
| 193 |
+
|
| 194 |
+
return text
|
| 195 |
|
| 196 |
def to_translit(arabic):
|
| 197 |
translit = []
|
|
|
|
| 201 |
else:
|
| 202 |
if arabic_to_english[letter] == "SHADDA":
|
| 203 |
if translit[-1][0] in vowels:
|
| 204 |
+
#translit[-2][1] = translit[-2][1].upper()
|
| 205 |
+
translit[-2][1] = translit[-2][1] + translit[-2][1]
|
| 206 |
else:
|
| 207 |
+
#translit[-1][1] = translit[-1][1].upper()
|
| 208 |
+
translit[-1][1] = translit[-1][1] + translit[-1][1]
|
| 209 |
+
|
| 210 |
else:
|
| 211 |
translit.append([letter, arabic_to_english[letter]])
|
| 212 |
|
| 213 |
+
return postprocess_arabic_transliteration("".join([x[1] for x in translit]))
|
|
|
|
| 214 |
|
| 215 |
# %%
|
| 216 |
|