Ibrahemqasim commited on
Commit
ada721f
·
verified ·
1 Parent(s): 07a48c1

Update nat_datasets.py

Browse files
Files changed (1) hide show
  1. nat_datasets.py +19 -14
nat_datasets.py CHANGED
@@ -106,14 +106,15 @@ for tab in tqdm.tqdm(data):
106
  key1 = re.sub(rf'\b{re.escape(en_country)}\b', EN_NAT_PATTERN, f" {key} ", re.IGNORECASE)
107
  key1 = key1.strip()
108
  # ---
 
 
 
109
  value1 = re.sub(rf'\b{re.escape(ar_country)}\b', NAT_PATTERN, f" {value} ", re.IGNORECASE)
110
  value1 = value1.strip()
111
  # ---
112
  if EN_NAT_PATTERN in key1 and NAT_PATTERN in value1:
113
  # ---
114
- if key1 in data_lists["categories_with_NAT_pattern"]:
115
- data_lists["categories_with_NAT_pattern"][key1]["count"] += 1
116
- else:
117
  data_lists["categories_with_NAT_pattern"][key1] = {"ar": value1, "count": 1}
118
  # ---
119
  # continue
@@ -124,18 +125,22 @@ for tab in tqdm.tqdm(data):
124
  key_digits = re.search(reg_year, key, re.IGNORECASE)
125
  value_digits = re.search(reg_year, value1, re.IGNORECASE)
126
  # ----
127
- if key_digits and value_digits and key_digits.group() == value_digits.group():
128
- # if key1 in data_lists["categories_with_years"]:
129
- # data_lists["categories_with_years"][key1]["count"] += 1
130
- # else:
131
- # data_lists["categories_with_years"][key1] = {"ar": value1, "count": 1}
132
- # ---
133
- key2 = key1.replace(key_digits.group(), YEAR_PATTERN)
134
- value2 = value1.replace(value_digits.group(), YEAR_PATTERN)
 
 
 
 
 
 
135
  # ---
136
- if key2 in data_lists["categories_with_YEAR_NAT_pattern"]:
137
- data_lists["categories_with_YEAR_NAT_pattern"][key2]["count"] += 1
138
- else:
139
  data_lists["categories_with_YEAR_NAT_pattern"][key2] = {"ar": value2, "count": 1}
140
  # ----
141
  continue
 
106
  key1 = re.sub(rf'\b{re.escape(en_country)}\b', EN_NAT_PATTERN, f" {key} ", re.IGNORECASE)
107
  key1 = key1.strip()
108
  # ---
109
+ if EN_NAT_PATTERN in key1 and key1 in data_lists["categories_with_NAT_pattern"]:
110
+ data_lists["categories_with_NAT_pattern"][key1]["count"] += 1
111
+ # ---
112
  value1 = re.sub(rf'\b{re.escape(ar_country)}\b', NAT_PATTERN, f" {value} ", re.IGNORECASE)
113
  value1 = value1.strip()
114
  # ---
115
  if EN_NAT_PATTERN in key1 and NAT_PATTERN in value1:
116
  # ---
117
+ if key1 not in data_lists["categories_with_NAT_pattern"]:
 
 
118
  data_lists["categories_with_NAT_pattern"][key1] = {"ar": value1, "count": 1}
119
  # ---
120
  # continue
 
125
  key_digits = re.search(reg_year, key, re.IGNORECASE)
126
  value_digits = re.search(reg_year, value1, re.IGNORECASE)
127
  # ----
128
+ if not key_digits:
129
+ continue
130
+ # ----
131
+ key2 = key1.replace(key_digits.group(), YEAR_PATTERN)
132
+ # ---
133
+ if key2 in data_lists["categories_with_YEAR_NAT_pattern"]:
134
+ data_lists["categories_with_YEAR_NAT_pattern"][key2]["count"] += 1
135
+ # ---
136
+ if not value_digits:
137
+ continue
138
+ # ----
139
+ value2 = value1.replace(value_digits.group(), YEAR_PATTERN)
140
+ # ----
141
+ if key_digits.group() == value_digits.group():
142
  # ---
143
+ if key2 not in data_lists["categories_with_YEAR_NAT_pattern"]:
 
 
144
  data_lists["categories_with_YEAR_NAT_pattern"][key2] = {"ar": value2, "count": 1}
145
  # ----
146
  continue