Spaces:
Running
Running
Yurii Paniv
commited on
Commit
·
8d24ba9
1
Parent(s):
ec8e88d
Add converter to latin
Browse files- .vscode/settings.json +3 -0
- converter.py +44 -1
- tests/test_converter.py +5 -3
.vscode/settings.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"python.formatting.provider": "black"
|
| 3 |
+
}
|
converter.py
CHANGED
|
@@ -3,4 +3,47 @@ def to_cyrillic(text):
|
|
| 3 |
|
| 4 |
|
| 5 |
def to_latin(text):
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
def to_latin(text):
|
| 6 |
+
text = text.lower()
|
| 7 |
+
cyrillic_mapping = {
|
| 8 |
+
"а": "a",
|
| 9 |
+
"б": "b",
|
| 10 |
+
"в": "v",
|
| 11 |
+
"г": "g",
|
| 12 |
+
"гъ": "ğ",
|
| 13 |
+
"д": "d",
|
| 14 |
+
"е": "e",
|
| 15 |
+
"ё": "ö",
|
| 16 |
+
"ж": "",
|
| 17 |
+
"з": "z",
|
| 18 |
+
"и": "i",
|
| 19 |
+
"й": "y",
|
| 20 |
+
"к": "k",
|
| 21 |
+
"къ": "q",
|
| 22 |
+
"л": "l",
|
| 23 |
+
"м": "m",
|
| 24 |
+
"н": "n",
|
| 25 |
+
"нъ": "ñ",
|
| 26 |
+
"о": "o",
|
| 27 |
+
"п": "p",
|
| 28 |
+
"р": "r",
|
| 29 |
+
"с": "s",
|
| 30 |
+
"т": "t",
|
| 31 |
+
"у": "u",
|
| 32 |
+
"ф": "f",
|
| 33 |
+
"х": "h",
|
| 34 |
+
"ц": "",
|
| 35 |
+
"ч": "ç",
|
| 36 |
+
"дж": "c",
|
| 37 |
+
"ш": "ş",
|
| 38 |
+
"щ": "",
|
| 39 |
+
"ъ": "",
|
| 40 |
+
"ы": "ı",
|
| 41 |
+
"ь": "",
|
| 42 |
+
"э": "e",
|
| 43 |
+
"ю": "yu",
|
| 44 |
+
"я": "ya",
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
for key in sorted(cyrillic_mapping.keys(), key=lambda x: len(x), reverse=True):
|
| 48 |
+
text = text.replace(key, cyrillic_mapping[key])
|
| 49 |
+
return text
|
tests/test_converter.py
CHANGED
|
@@ -11,7 +11,7 @@ def test_latin_converter():
|
|
| 11 |
cases = _read_test_cases()
|
| 12 |
print(cases)
|
| 13 |
for case in cases:
|
| 14 |
-
assert converter.to_latin(case[1]) == case[0]
|
| 15 |
|
| 16 |
|
| 17 |
def test_letter_coverage():
|
|
@@ -94,13 +94,15 @@ def test_letter_coverage():
|
|
| 94 |
cases = _read_test_cases()
|
| 95 |
missing_letters = []
|
| 96 |
latin_cases = " ".join([case[0] for case in cases]).lower()
|
| 97 |
-
for letter in latin_alphabet:
|
| 98 |
if letter not in latin_cases:
|
| 99 |
missing_letters.append(letter)
|
|
|
|
| 100 |
cyrillic_cases = " ".join([case[1] for case in cases]).lower()
|
| 101 |
-
for letter in cyrillic_alphabet:
|
| 102 |
if letter not in cyrillic_cases:
|
| 103 |
missing_letters.append(letter)
|
|
|
|
| 104 |
if len(missing_letters) > 0:
|
| 105 |
raise Exception(f"'{missing_letters}' not found in test dataset!")
|
| 106 |
|
|
|
|
| 11 |
cases = _read_test_cases()
|
| 12 |
print(cases)
|
| 13 |
for case in cases:
|
| 14 |
+
assert converter.to_latin(case[1]).lower() == case[0].lower()
|
| 15 |
|
| 16 |
|
| 17 |
def test_letter_coverage():
|
|
|
|
| 94 |
cases = _read_test_cases()
|
| 95 |
missing_letters = []
|
| 96 |
latin_cases = " ".join([case[0] for case in cases]).lower()
|
| 97 |
+
for letter in sorted(latin_alphabet, key=lambda x: len(x), reverse=True):
|
| 98 |
if letter not in latin_cases:
|
| 99 |
missing_letters.append(letter)
|
| 100 |
+
latin_cases = latin_cases.replace(letter, "")
|
| 101 |
cyrillic_cases = " ".join([case[1] for case in cases]).lower()
|
| 102 |
+
for letter in sorted(cyrillic_alphabet, key=lambda x: len(x), reverse=True):
|
| 103 |
if letter not in cyrillic_cases:
|
| 104 |
missing_letters.append(letter)
|
| 105 |
+
cyrillic_cases = cyrillic_cases.replace(letter, "")
|
| 106 |
if len(missing_letters) > 0:
|
| 107 |
raise Exception(f"'{missing_letters}' not found in test dataset!")
|
| 108 |
|