import tqdm import re import requests from huggingface_hub import login from datasets import Dataset from datasets import load_dataset from google.colab import userdata login(userdata.get('HF_TOKEN')) nats_url = "https://raw.githubusercontent.com/MrIbrahem/Nationalities/refs/heads/main/nats.json" response = requests.get(nats_url) nationalities = response.json() data_list = [] # --- skip = [ "barbadian_2", "west india !", "democratic republic of the congo", ] # --- for x in nationalities["data"]: if x["nat"] in skip: continue # --- if x["nat"].replace("-", " ").replace("the ", "").lower() == x["en"].replace("-", " ").replace("the ", "").lower(): continue # --- data_list.append({ "nat_en": x["nat"], "man": x["men"], "men": x["mens"], "women": x["women"], "womens": x["womens"], "country_en": x["en"], "country_ar": x["ar"], }) # --- data_list = sorted(data_list, key=lambda x: -x["nat_en"].count(' ')) # --- print("______________") print(f"len of nationalities : {len(data_list)}.") # --- print("____________________________") # --- # إنشاء Dataset dataset = Dataset.from_list(data_list) # رفع Dataset إلى Hugging Face dataset.push_to_hub("Ibrahemqasim/nationalities") # --- print("dataset: Ibrahemqasim/nationalities push_to_hub successfully!")