Update fix langlinks.py
Browse files- fix langlinks.py +19 -3
fix langlinks.py
CHANGED
@@ -13,15 +13,25 @@ json_url = "https://huggingface.co/Ibrahemqasim/enwiki_to_arwiki_categories/reso
|
|
13 |
response = requests.get(json_url)
|
14 |
data = response.json()
|
15 |
|
|
|
|
|
|
|
|
|
|
|
16 |
# تحويل القاموس إلى قائمة من القواميس [{ "en": "value", "ar": "value" }, ...]
|
17 |
|
18 |
data_lists = {
|
19 |
"langlinks" : {},
|
20 |
"filtered_data" : {},
|
|
|
21 |
"cats_2000" : {},
|
22 |
}
|
23 |
|
24 |
-
for
|
|
|
|
|
|
|
|
|
25 |
# "Category:1. FC Köln non-playing staff"
|
26 |
# remove " from start and end
|
27 |
# ---
|
@@ -49,6 +59,13 @@ for key, value in tqdm.tqdm(data.items()):
|
|
49 |
# ---
|
50 |
# data_lists["cats_2000"].append({"en": key2, "ar": value2})
|
51 |
data_lists["cats_2000"][key] = value
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
|
54 |
for x, data_list in data_lists.items():
|
@@ -65,8 +82,7 @@ for x, data_list in data_lists.items():
|
|
65 |
# repo_type="dataset", # نوع المستودع (نستخدم dataset للملفات)
|
66 |
)
|
67 |
|
68 |
-
print(f"
|
69 |
print(f"file: {x} uploaded successfully!")
|
70 |
-
|
71 |
print(f"{len(data)=}.")
|
72 |
print(f"{len(data_list)} rows uploaded.")
|
|
|
13 |
response = requests.get(json_url)
|
14 |
data = response.json()
|
15 |
|
16 |
+
# تحميل الملف JSON من الرابط مباشرة
|
17 |
+
json_url2 = "https://huggingface.co/Ibrahemqasim/enwiki_to_arwiki_categories/resolve/main/countries.json"
|
18 |
+
response2 = requests.get(json_url2)
|
19 |
+
countries = response2.json()
|
20 |
+
|
21 |
# تحويل القاموس إلى قائمة من القواميس [{ "en": "value", "ar": "value" }, ...]
|
22 |
|
23 |
data_lists = {
|
24 |
"langlinks" : {},
|
25 |
"filtered_data" : {},
|
26 |
+
"cats_2000_contry" : {},
|
27 |
"cats_2000" : {},
|
28 |
}
|
29 |
|
30 |
+
for tab in tqdm.tqdm(data):
|
31 |
+
# ---
|
32 |
+
key = tab["en"]
|
33 |
+
value = tab["ar"]
|
34 |
+
# ---
|
35 |
# "Category:1. FC Köln non-playing staff"
|
36 |
# remove " from start and end
|
37 |
# ---
|
|
|
59 |
# ---
|
60 |
# data_lists["cats_2000"].append({"en": key2, "ar": value2})
|
61 |
data_lists["cats_2000"][key] = value
|
62 |
+
# ----
|
63 |
+
for en_c, ar_c in countries.items():
|
64 |
+
if en_c in key2 and ar_c in value2:
|
65 |
+
key3 = key2.replace(en_c, "country")
|
66 |
+
value3 = value2.replace(ar_c, "country")
|
67 |
+
# ---
|
68 |
+
data_lists["cats_2000_contry"][key3] = value3
|
69 |
|
70 |
|
71 |
for x, data_list in data_lists.items():
|
|
|
82 |
# repo_type="dataset", # نوع المستودع (نستخدم dataset للملفات)
|
83 |
)
|
84 |
|
85 |
+
print(f"______________")
|
86 |
print(f"file: {x} uploaded successfully!")
|
|
|
87 |
print(f"{len(data)=}.")
|
88 |
print(f"{len(data_list)} rows uploaded.")
|