Ibrahemqasim commited on
Commit
f012e57
·
verified ·
1 Parent(s): 9f045f5

Update fix langlinks.py

Browse files
Files changed (1) hide show
  1. fix langlinks.py +19 -3
fix langlinks.py CHANGED
@@ -13,15 +13,25 @@ json_url = "https://huggingface.co/Ibrahemqasim/enwiki_to_arwiki_categories/reso
13
  response = requests.get(json_url)
14
  data = response.json()
15
 
 
 
 
 
 
16
  # تحويل القاموس إلى قائمة من القواميس [{ "en": "value", "ar": "value" }, ...]
17
 
18
  data_lists = {
19
  "langlinks" : {},
20
  "filtered_data" : {},
 
21
  "cats_2000" : {},
22
  }
23
 
24
- for key, value in tqdm.tqdm(data.items()):
 
 
 
 
25
  # "Category:1. FC Köln non-playing staff"
26
  # remove " from start and end
27
  # ---
@@ -49,6 +59,13 @@ for key, value in tqdm.tqdm(data.items()):
49
  # ---
50
  # data_lists["cats_2000"].append({"en": key2, "ar": value2})
51
  data_lists["cats_2000"][key] = value
 
 
 
 
 
 
 
52
 
53
 
54
  for x, data_list in data_lists.items():
@@ -65,8 +82,7 @@ for x, data_list in data_lists.items():
65
  # repo_type="dataset", # نوع المستودع (نستخدم dataset للملفات)
66
  )
67
 
68
- print(f"__________________")
69
  print(f"file: {x} uploaded successfully!")
70
-
71
  print(f"{len(data)=}.")
72
  print(f"{len(data_list)} rows uploaded.")
 
13
  response = requests.get(json_url)
14
  data = response.json()
15
 
16
+ # تحميل الملف JSON من الرابط مباشرة
17
+ json_url2 = "https://huggingface.co/Ibrahemqasim/enwiki_to_arwiki_categories/resolve/main/countries.json"
18
+ response2 = requests.get(json_url2)
19
+ countries = response2.json()
20
+
21
  # تحويل القاموس إلى قائمة من القواميس [{ "en": "value", "ar": "value" }, ...]
22
 
23
  data_lists = {
24
  "langlinks" : {},
25
  "filtered_data" : {},
26
+ "cats_2000_contry" : {},
27
  "cats_2000" : {},
28
  }
29
 
30
+ for tab in tqdm.tqdm(data):
31
+ # ---
32
+ key = tab["en"]
33
+ value = tab["ar"]
34
+ # ---
35
  # "Category:1. FC Köln non-playing staff"
36
  # remove " from start and end
37
  # ---
 
59
  # ---
60
  # data_lists["cats_2000"].append({"en": key2, "ar": value2})
61
  data_lists["cats_2000"][key] = value
62
+ # ----
63
+ for en_c, ar_c in countries.items():
64
+ if en_c in key2 and ar_c in value2:
65
+ key3 = key2.replace(en_c, "country")
66
+ value3 = value2.replace(ar_c, "country")
67
+ # ---
68
+ data_lists["cats_2000_contry"][key3] = value3
69
 
70
 
71
  for x, data_list in data_lists.items():
 
82
  # repo_type="dataset", # نوع المستودع (نستخدم dataset للملفات)
83
  )
84
 
85
+ print(f"______________")
86
  print(f"file: {x} uploaded successfully!")
 
87
  print(f"{len(data)=}.")
88
  print(f"{len(data_list)} rows uploaded.")