Ibrahemqasim commited on
Commit
1c94de7
·
verified ·
1 Parent(s): 10b9478

Update nat_datasets.py

Browse files
Files changed (1) hide show
  1. nat_datasets.py +26 -23
nat_datasets.py CHANGED
@@ -48,27 +48,27 @@ match1_done = 0
48
 
49
 
50
  def new_func(value, ar_tab):
51
- # ---
52
- ar_country = ar_tab.get("men", "")
53
- # ---
54
- if ar_country and ar_country in value:
55
- return ar_country, "{NAT_MEN}"
56
- # ---
57
- ar_country2 = ar_tab.get("womens", "")
58
- # ---
59
- if ar_country2 and ar_country2 in value:
60
- return ar_country2, "{NAT_WOMENS}"
61
- # ---
62
- ar_country3 = ar_tab.get("women", "")
63
- # ---
64
- if ar_country3 and ar_country2 in value:
65
- return ar_country3, "{NAT_WOMEN}"
66
- # ---
67
- ar_country4 = ar_tab.get("man", "")
68
- # ---
69
- if ar_country4 and ar_country2 in value:
70
- return ar_country4, "{NAT_MAN}"
71
- # ---
72
  return "", ""
73
 
74
 
@@ -103,8 +103,11 @@ for tab in tqdm.tqdm(data):
103
  if not NAT_PATTERN:
104
  continue
105
  # ---
106
- key1 = re.sub(rf'\b{re.escape(en_country)}\b', EN_NAT_PATTERN, key, re.IGNORECASE)
107
- value1 = re.sub(rf'\b{re.escape(ar_country)}\b', NAT_PATTERN, value, re.IGNORECASE)
 
 
 
108
  # ---
109
  # if EN_NAT_PATTERN in key1 and NAT_PATTERN in value1:
110
  # ---
 
48
 
49
 
50
  def new_func(value, ar_tab):
51
+ # List of possible keys and their corresponding tags
52
+ patterns = [
53
+ ("men", "{NAT_MEN}"),
54
+ ("womens", "{NAT_WOMENS}"),
55
+ ("women", "{NAT_WOMEN}"),
56
+ ("man", "{NAT_MAN}"),
57
+ ]
58
+
59
+ # Iterate through the patterns
60
+ for key, tag in patterns:
61
+ country = ar_tab.get(key, "")
62
+ if not country:
63
+ continue
64
+ # ---
65
+ country2 = f"ال{country}".replace(" ", " ال")
66
+ # ---
67
+ if country2 in value:
68
+ return country2, tag
69
+ elif country in value:
70
+ return country, tag
71
+
72
  return "", ""
73
 
74
 
 
103
  if not NAT_PATTERN:
104
  continue
105
  # ---
106
+ key1 = re.sub(rf'\b{re.escape(en_country)}\b', EN_NAT_PATTERN, f" {key} ", re.IGNORECASE)
107
+ key1 = key1.strip()
108
+ # ---
109
+ value1 = re.sub(rf'\b{re.escape(ar_country)}\b', NAT_PATTERN, f" {value} ", re.IGNORECASE)
110
+ value1 = value1.strip()
111
  # ---
112
  # if EN_NAT_PATTERN in key1 and NAT_PATTERN in value1:
113
  # ---