Ibrahemqasim commited on
Commit
4cf98f3
ยท
verified ยท
1 Parent(s): f7d8dff

Update fix langlinks.py

Browse files
Files changed (1) hide show
  1. fix langlinks.py +20 -13
fix langlinks.py CHANGED
@@ -21,6 +21,13 @@ countries = response2.json()
21
 
22
  # ุชุญูˆูŠู„ ุงู„ู‚ุงู…ูˆุณ ุฅู„ู‰ ู‚ุงุฆู…ุฉ ู…ู† ุงู„ู‚ูˆุงู…ูŠุณ [{ "en": "value", "ar": "value" }, ...]
23
 
 
 
 
 
 
 
 
24
  data_lists = {
25
  "langlinks" : {},
26
  "filtered_data" : {},
@@ -59,7 +66,7 @@ for tab in tqdm.tqdm(data):
59
  value2 = value.replace(value_digits.group(), "2000")
60
  # ---
61
  # data_lists["cats_2000"].append({"en": key2, "ar": value2})
62
- data_lists["cats_2000"][key] = value
63
  # ----
64
  for en_c, ar_c in countries.items():
65
  if en_c in key2 and ar_c in value2:
@@ -71,7 +78,17 @@ for tab in tqdm.tqdm(data):
71
 
72
  print(f"all data len: {len(data):,}.")
73
 
74
- for x, data_list in data_lists.items():
 
 
 
 
 
 
 
 
 
 
75
  data_list = [{"en": key, "ar": value} for key, value in data_list.items()]
76
 
77
  # ุญูุธ ุงู„ู‚ุงู…ูˆุณ ุงู„ู…ุตุญุญ ููŠ ู…ู„ู JSON
@@ -89,17 +106,7 @@ for x, data_list in data_lists.items():
89
  # repo_type="dataset", # ู†ูˆุน ุงู„ู…ุณุชูˆุฏุน (ู†ุณุชุฎุฏู… dataset ู„ู„ู…ู„ูุงุช)
90
  )
91
 
92
- print("____________________________")
93
-
94
- datasets_list = {
95
- "langlinks" : "categories_en2ar",
96
- "filtered_data" : "categories_en2ar_with_years",
97
- "cats_2000_contry" : "categories_en2ar-cats_2000_contry",
98
- "cats_2000" : "categories_en2ar-cats_2000",
99
- }
100
-
101
- for x, data_list in data_lists.items():
102
- data_list = [{"en": key, "ar": value} for key, value in data_list.items()]
103
 
104
  set_name = datasets_list.get(x)
105
 
 
21
 
22
  # ุชุญูˆูŠู„ ุงู„ู‚ุงู…ูˆุณ ุฅู„ู‰ ู‚ุงุฆู…ุฉ ู…ู† ุงู„ู‚ูˆุงู…ูŠุณ [{ "en": "value", "ar": "value" }, ...]
23
 
24
+ to_work = [
25
+ # "langlinks",
26
+ # "filtered_data",
27
+ # "cats_2000_contry",
28
+ "cats_2000",
29
+ ]
30
+
31
  data_lists = {
32
  "langlinks" : {},
33
  "filtered_data" : {},
 
66
  value2 = value.replace(value_digits.group(), "2000")
67
  # ---
68
  # data_lists["cats_2000"].append({"en": key2, "ar": value2})
69
+ data_lists["cats_2000"][key2] = value2
70
  # ----
71
  for en_c, ar_c in countries.items():
72
  if en_c in key2 and ar_c in value2:
 
78
 
79
  print(f"all data len: {len(data):,}.")
80
 
81
+ datasets_list = {
82
+ "langlinks" : "categories_en2ar",
83
+ "filtered_data" : "categories_en2ar_with_years",
84
+ "cats_2000_contry" : "categories_en2ar-cats_2000_contry",
85
+ "cats_2000" : "categories_en2ar-cats_2000",
86
+ }
87
+
88
+ # for x, data_list in data_lists.items():
89
+ for x in to_work:
90
+ data_list = data_lists.get(x)
91
+ # ---
92
  data_list = [{"en": key, "ar": value} for key, value in data_list.items()]
93
 
94
  # ุญูุธ ุงู„ู‚ุงู…ูˆุณ ุงู„ู…ุตุญุญ ููŠ ู…ู„ู JSON
 
106
  # repo_type="dataset", # ู†ูˆุน ุงู„ู…ุณุชูˆุฏุน (ู†ุณุชุฎุฏู… dataset ู„ู„ู…ู„ูุงุช)
107
  )
108
 
109
+ print("____________________________")
 
 
 
 
 
 
 
 
 
 
110
 
111
  set_name = datasets_list.get(x)
112