Update fix langlinks.py
Browse files- fix langlinks.py +22 -0
fix langlinks.py
CHANGED
@@ -4,6 +4,7 @@ import json
|
|
4 |
import requests
|
5 |
from huggingface_hub import login
|
6 |
from huggingface_hub import upload_file
|
|
|
7 |
|
8 |
# تسجيل الدخول إلى Hugging Face (استبدل "YOUR_ACCESS_TOKEN" بالتوكن الخاص بك)
|
9 |
login("YOUR_ACCESS_TOKEN")
|
@@ -87,3 +88,24 @@ for x, data_list in data_lists.items():
|
|
87 |
repo_id="Ibrahemqasim/enwiki_to_arwiki_categories", # معرف المستودع
|
88 |
# repo_type="dataset", # نوع المستودع (نستخدم dataset للملفات)
|
89 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
import requests
|
5 |
from huggingface_hub import login
|
6 |
from huggingface_hub import upload_file
|
7 |
+
from datasets import Dataset
|
8 |
|
9 |
# تسجيل الدخول إلى Hugging Face (استبدل "YOUR_ACCESS_TOKEN" بالتوكن الخاص بك)
|
10 |
login("YOUR_ACCESS_TOKEN")
|
|
|
88 |
repo_id="Ibrahemqasim/enwiki_to_arwiki_categories", # معرف المستودع
|
89 |
# repo_type="dataset", # نوع المستودع (نستخدم dataset للملفات)
|
90 |
)
|
91 |
+
|
92 |
+
print("____________________________")
|
93 |
+
|
94 |
+
datasets_list = {
|
95 |
+
"langlinks" : "categories_en2ar",
|
96 |
+
"filtered_data" : "categories_en2ar_with_years",
|
97 |
+
"cats_2000_contry" : "categories_en2ar-cats_2000_contry",
|
98 |
+
"cats_2000" : "categories_en2ar-cats_2000",
|
99 |
+
}
|
100 |
+
|
101 |
+
for x, data_list in data_lists.items():
|
102 |
+
data_list = [{"en": key, "ar": value} for key, value in data_list.items()]
|
103 |
+
|
104 |
+
set_name = datasets_list.get(x)
|
105 |
+
|
106 |
+
if set_name:
|
107 |
+
# إنشاء Dataset
|
108 |
+
dataset = Dataset.from_list(data_list)
|
109 |
+
|
110 |
+
# رفع Dataset إلى Hugging Face
|
111 |
+
dataset.push_to_hub(f"Ibrahemqasim/{set_name}")
|