Update nat_datasets.py
Browse files- nat_datasets.py +19 -14
nat_datasets.py
CHANGED
@@ -106,14 +106,15 @@ for tab in tqdm.tqdm(data):
|
|
106 |
key1 = re.sub(rf'\b{re.escape(en_country)}\b', EN_NAT_PATTERN, f" {key} ", re.IGNORECASE)
|
107 |
key1 = key1.strip()
|
108 |
# ---
|
|
|
|
|
|
|
109 |
value1 = re.sub(rf'\b{re.escape(ar_country)}\b', NAT_PATTERN, f" {value} ", re.IGNORECASE)
|
110 |
value1 = value1.strip()
|
111 |
# ---
|
112 |
if EN_NAT_PATTERN in key1 and NAT_PATTERN in value1:
|
113 |
# ---
|
114 |
-
if key1 in data_lists["categories_with_NAT_pattern"]:
|
115 |
-
data_lists["categories_with_NAT_pattern"][key1]["count"] += 1
|
116 |
-
else:
|
117 |
data_lists["categories_with_NAT_pattern"][key1] = {"ar": value1, "count": 1}
|
118 |
# ---
|
119 |
# continue
|
@@ -124,18 +125,22 @@ for tab in tqdm.tqdm(data):
|
|
124 |
key_digits = re.search(reg_year, key, re.IGNORECASE)
|
125 |
value_digits = re.search(reg_year, value1, re.IGNORECASE)
|
126 |
# ----
|
127 |
-
if
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
key2
|
134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
# ---
|
136 |
-
if key2 in data_lists["categories_with_YEAR_NAT_pattern"]:
|
137 |
-
data_lists["categories_with_YEAR_NAT_pattern"][key2]["count"] += 1
|
138 |
-
else:
|
139 |
data_lists["categories_with_YEAR_NAT_pattern"][key2] = {"ar": value2, "count": 1}
|
140 |
# ----
|
141 |
continue
|
|
|
106 |
key1 = re.sub(rf'\b{re.escape(en_country)}\b', EN_NAT_PATTERN, f" {key} ", re.IGNORECASE)
|
107 |
key1 = key1.strip()
|
108 |
# ---
|
109 |
+
if EN_NAT_PATTERN in key1 and key1 in data_lists["categories_with_NAT_pattern"]:
|
110 |
+
data_lists["categories_with_NAT_pattern"][key1]["count"] += 1
|
111 |
+
# ---
|
112 |
value1 = re.sub(rf'\b{re.escape(ar_country)}\b', NAT_PATTERN, f" {value} ", re.IGNORECASE)
|
113 |
value1 = value1.strip()
|
114 |
# ---
|
115 |
if EN_NAT_PATTERN in key1 and NAT_PATTERN in value1:
|
116 |
# ---
|
117 |
+
if key1 not in data_lists["categories_with_NAT_pattern"]:
|
|
|
|
|
118 |
data_lists["categories_with_NAT_pattern"][key1] = {"ar": value1, "count": 1}
|
119 |
# ---
|
120 |
# continue
|
|
|
125 |
key_digits = re.search(reg_year, key, re.IGNORECASE)
|
126 |
value_digits = re.search(reg_year, value1, re.IGNORECASE)
|
127 |
# ----
|
128 |
+
if not key_digits:
|
129 |
+
continue
|
130 |
+
# ----
|
131 |
+
key2 = key1.replace(key_digits.group(), YEAR_PATTERN)
|
132 |
+
# ---
|
133 |
+
if key2 in data_lists["categories_with_YEAR_NAT_pattern"]:
|
134 |
+
data_lists["categories_with_YEAR_NAT_pattern"][key2]["count"] += 1
|
135 |
+
# ---
|
136 |
+
if not value_digits:
|
137 |
+
continue
|
138 |
+
# ----
|
139 |
+
value2 = value1.replace(value_digits.group(), YEAR_PATTERN)
|
140 |
+
# ----
|
141 |
+
if key_digits.group() == value_digits.group():
|
142 |
# ---
|
143 |
+
if key2 not in data_lists["categories_with_YEAR_NAT_pattern"]:
|
|
|
|
|
144 |
data_lists["categories_with_YEAR_NAT_pattern"][key2] = {"ar": value2, "count": 1}
|
145 |
# ----
|
146 |
continue
|