|
import re |
|
import tqdm |
|
import json |
|
import requests |
|
from huggingface_hub import login |
|
from huggingface_hub import upload_file |
|
|
|
|
|
login("YOUR_ACCESS_TOKEN") |
|
|
|
|
|
json_url = "https://huggingface.co/Ibrahemqasim/enwiki_to_arwiki_categories/resolve/main/langlinks.json" |
|
response = requests.get(json_url) |
|
data = response.json() |
|
|
|
|
|
data_list = [] |
|
for key, value in tqdm.tqdm(data.items()): |
|
|
|
|
|
|
|
if key.startswith('"') and key.endswith('"'): |
|
key = key[1:-1] |
|
|
|
|
|
|
|
if value.startswith(':"') and value.endswith('",'): |
|
value = value[2:-2] |
|
|
|
data_list.append({"en": key, "ar": value}) |
|
|
|
|
|
with open("langlinks_fixed.json", "w", encoding="utf-8") as f: |
|
json.dump(data_list, f, ensure_ascii=False, indent=4) |
|
|
|
|
|
upload_file( |
|
path_or_fileobj="langlinks_fixed.json", |
|
path_in_repo="langlinks_fixed.json", |
|
repo_id="Ibrahemqasim/enwiki_to_arwiki_categories", |
|
|
|
) |
|
|