File size: 1,378 Bytes
2363599
 
 
 
 
 
 
 
 
 
 
 
 
 
f15a0b5
 
54fd4e7
39a1891
54fd4e7
 
 
 
f15a0b5
54fd4e7
 
 
39a1891
54fd4e7
 
f15a0b5
 
 
 
 
 
 
 
 
 
2363599
38f99e1
 
2363599
 
 
 
 
 
 
 
 
38f99e1
2363599
ee8f21a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import tqdm
import re
import requests
from huggingface_hub import login
from datasets import Dataset
from datasets import load_dataset
from google.colab import userdata

login(userdata.get('HF_TOKEN'))

nats_url = "https://raw.githubusercontent.com/MrIbrahem/Nationalities/refs/heads/main/nats.json"
response = requests.get(nats_url)
nationalities = response.json()

data_list = []
# ---
skip = [
    "barbadian_2",
    "west india !",
    "democratic republic of the congo",
]
# ---
for x in nationalities["data"]:
    if x["nat"] in skip:
        continue
    # ---
    if x["nat"].replace("-", " ").replace("the ", "").lower() == x["en"].replace("-", " ").replace("the ", "").lower():
        continue
    # ---
    data_list.append({
        "nat_en": x["nat"],
        "man": x["men"],
        "men": x["mens"],
        "women": x["women"],
        "womens": x["womens"],

        "country_en": x["en"],
        "country_ar": x["ar"],
    })
# ---
data_list = sorted(data_list, key=lambda x: -x["nat_en"].count(' '))
# ---
print("______________")
print(f"len of nationalities : {len(data_list)}.")
# ---
print("____________________________")
# ---
# إنشاء Dataset
dataset = Dataset.from_list(data_list)

# رفع Dataset إلى Hugging Face
dataset.push_to_hub("Ibrahemqasim/nationalities")
# ---
print("dataset: Ibrahemqasim/nationalities push_to_hub successfully!")