diff --git a/data/Abkhaz-Adyghe.json b/data/Abkhaz-Adyghe.json index 86231fd87f608137e0ec3f745a126d6ab2045159..fa2edb3b4d2d733a1117c7ea9f63440223b89bac 100644 --- a/data/Abkhaz-Adyghe.json +++ b/data/Abkhaz-Adyghe.json @@ -2,101 +2,101 @@ "name": "Abkhaz-Adyghe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Abkhaz-Abazin", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Abkhaz", "iso_1_code": "ab", "iso_3_code": "abk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Abaza", "iso_1_code": null, "iso_3_code": "abq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "1", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Circassian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Adyghe", "iso_1_code": null, "iso_3_code": "ady", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Kabardian", "iso_1_code": null, "iso_3_code": "kbd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "4", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ubyx", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ubykh", "iso_1_code": null, "iso_3_code": "uby", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "0", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Afro-Asiatic.json b/data/Afro-Asiatic.json index 5c6412fd41b839202ce42ead58eb3d3388f76b5d..56e106a7a691805f1f5406834a80038187266aa1 100644 --- a/data/Afro-Asiatic.json +++ b/data/Afro-Asiatic.json @@ -2,7842 +2,7034 @@ "name": "Afro-Asiatic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Copt": { - "full_object": "StanzaTokenizer(\"cop\")", - "original_lang_name": "coptic", - "original_lang_code": "cop", - "scripts": [ - "Copt" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Hebr": { - "full_object": "SpaCyTokenizer(\"he\")", - "original_lang_name": "hebrew", - "original_lang_code": "heb", - "scripts": [ - "Hebr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Ethi": { - "full_object": "SpaCyTokenizer(\"ti\")", - "original_lang_name": "tigrinya", - "original_lang_code": "tir", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Berber", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Guanche", "iso_1_code": null, "iso_3_code": "gnc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "11", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Awjila-Sokna", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Awjilah", "iso_1_code": null, "iso_3_code": "auj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "14", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sawknah", "iso_1_code": null, "iso_3_code": "swn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "15", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "13", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Siwa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Siwi", "iso_1_code": null, "iso_3_code": "siz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "17", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "16", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "12", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Chenoua", "iso_1_code": null, "iso_3_code": "cnu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "19", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Atlas", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Judeo-Berber", "iso_1_code": null, "iso_3_code": "jbe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "21", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tachelhit", "iso_1_code": null, "iso_3_code": "shi", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "22", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tamazight, Central Atlas", "iso_1_code": null, "iso_3_code": "tzm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "23", + "native_tokenizers": [], "scripts": [ "Tfng" - ], - "own_tokenizer": false + ] }, { "name": "Tamazight, Standard Moroccan", "iso_1_code": null, "iso_3_code": "zgh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "24", + "native_tokenizers": [], "scripts": [ "Tfng" - ], - "own_tokenizer": false + ] } ], - "node_i": "20", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Kabyle", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "20", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Kabyle", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Kabyle", "iso_1_code": null, "iso_3_code": "kab", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "26", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "25", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zenati", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ghadam\u00e8s", "iso_1_code": null, "iso_3_code": "gha", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "29", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nafusi", "iso_1_code": null, "iso_3_code": "jbn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "30", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sened", "iso_1_code": null, "iso_3_code": "sds", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "31", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "28", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ghomara", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ghomara", "iso_1_code": null, "iso_3_code": "gho", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "33", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "32", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mzab-Wargla", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Taznatit", "iso_1_code": null, "iso_3_code": "grr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "35", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tumzabt", "iso_1_code": null, "iso_3_code": "mzb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "36", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tagargrent", "iso_1_code": null, "iso_3_code": "oua", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "37", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tamazight, Temacine", "iso_1_code": null, "iso_3_code": "tjo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "38", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "34", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Riff", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tarifit", "iso_1_code": null, "iso_3_code": "rif", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "40", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Senhaja Berber", "iso_1_code": null, "iso_3_code": "sjs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "41", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "39", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shawiya", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tachawit", "iso_1_code": null, "iso_3_code": "shy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "43", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "42", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tidikelt", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tamazight, Tidikelt", "iso_1_code": null, "iso_3_code": "tia", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "45", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "44", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "27", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "18", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Tamasheq", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "18", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Tamasheq", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Tamahaq, Tahaggart", "iso_1_code": null, "iso_3_code": "thv", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "48", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "47", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Southern", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "47", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Southern", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Tamasheq", "iso_1_code": null, "iso_3_code": "taq", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "50", + "native_tokenizers": [], "scripts": [ "Latn", "Tfng" - ], - "own_tokenizer": false + ] }, { "name": "Tamajeq, Tayart", "iso_1_code": null, "iso_3_code": "thz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "51", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tamajaq, Tawallammat", "iso_1_code": null, "iso_3_code": "ttq", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "52", + "native_tokenizers": [], "scripts": [ "Latn", "Tfng" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "49", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "46", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zenaga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tetserret", "iso_1_code": null, "iso_3_code": "tez", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "54", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zenaga", "iso_1_code": null, "iso_3_code": "zen", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "55", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "53", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "10", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Chadic", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "10", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Chadic", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Biu-Mandara", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "A", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "A.1", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Boga", "iso_1_code": null, "iso_3_code": "bvw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "61", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ga\u2019anda", "iso_1_code": null, "iso_3_code": "gqa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "62", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hwana", "iso_1_code": null, "iso_3_code": "hwo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "63", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "60", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jara", "iso_1_code": null, "iso_3_code": "jaf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "65", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tera", "iso_1_code": null, "iso_3_code": "ttr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "66", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "64", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "59", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "A.2", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nggwahyi", "iso_1_code": null, "iso_3_code": "ngx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "68", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "1", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bura-Pabir", "iso_1_code": null, "iso_3_code": "bwr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "70", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kibaku", "iso_1_code": null, "iso_3_code": "ckl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "71", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kofa", "iso_1_code": null, "iso_3_code": "kso", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "72", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Putai", "iso_1_code": null, "iso_3_code": "mfl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "73", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "69", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "2", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nya Huba", "iso_1_code": null, "iso_3_code": "hbb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "75", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Marghi South", "iso_1_code": null, "iso_3_code": "mfm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "76", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Marghi Central", "iso_1_code": null, "iso_3_code": "mrt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "77", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "74", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "67", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "A.3", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bana", "iso_1_code": null, "iso_3_code": "bcw", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "79", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kirya-Konzel", "iso_1_code": null, "iso_3_code": "fkk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "80", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kamwe", "iso_1_code": null, "iso_3_code": "hig", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "81", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Hya", "iso_1_code": null, "iso_3_code": "hya", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "82", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Psikye", "iso_1_code": null, "iso_3_code": "kvj", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "83", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "78", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "A.4", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "78", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "A.4", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Lamang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Lamang", "iso_1_code": null, "iso_3_code": "hia", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "86", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vemgo-Mabas", "iso_1_code": null, "iso_3_code": "vem", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "87", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hdi", "iso_1_code": null, "iso_3_code": "xed", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "88", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "85", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Mandara Proper", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "85", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Mandara Proper", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Glavda", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cineni", "iso_1_code": null, "iso_3_code": "cie", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "91", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dghwede", "iso_1_code": null, "iso_3_code": "dgh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "92", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Guduf-Gava", "iso_1_code": null, "iso_3_code": "gdf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "93", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Glavda", "iso_1_code": null, "iso_3_code": "glw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "94", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gvoko", "iso_1_code": null, "iso_3_code": "ngs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "95", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "90", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mandara", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Wandala", "iso_1_code": null, "iso_3_code": "mfi", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "97", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "96", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Podoko", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "96", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Podoko", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Parkwa", "iso_1_code": null, "iso_3_code": "pbi", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "99", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "98", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "89", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "84", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "A.5", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "84", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "A.5", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Baldemu", "iso_1_code": null, "iso_3_code": "bdn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "101", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cuvok", "iso_1_code": null, "iso_3_code": "cuv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "102", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dugwor", "iso_1_code": null, "iso_3_code": "dme", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "103", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Giziga, North", "iso_1_code": null, "iso_3_code": "gis", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "104", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Giziga", "iso_1_code": null, "iso_3_code": "giz", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "105", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zulgo-Gemzek", "iso_1_code": null, "iso_3_code": "gnd", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "106", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mafa", "iso_1_code": null, "iso_3_code": "maf", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "107", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Merey", "iso_1_code": null, "iso_3_code": "meq", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "108", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Matal", "iso_1_code": null, "iso_3_code": "mfh", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "109", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mefele", "iso_1_code": null, "iso_3_code": "mfj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "110", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mofu, North", "iso_1_code": null, "iso_3_code": "mfk", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "111", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mofu-Gudur", "iso_1_code": null, "iso_3_code": "mif", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "112", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Vame", "iso_1_code": null, "iso_3_code": "mlr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "113", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Moloko", "iso_1_code": null, "iso_3_code": "mlw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "114", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbuko", "iso_1_code": null, "iso_3_code": "mqb", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "115", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Muyang", "iso_1_code": null, "iso_3_code": "muy", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "116", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mada", "iso_1_code": null, "iso_3_code": "mxu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "117", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wuzlam", "iso_1_code": null, "iso_3_code": "udl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "118", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "100", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "A.6", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sukur", "iso_1_code": null, "iso_3_code": "syk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "120", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "119", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "A.7", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Buwal", "iso_1_code": null, "iso_3_code": "bhs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "122", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Daba", "iso_1_code": null, "iso_3_code": "dbq", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "123", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mazagway-Hidi", "iso_1_code": null, "iso_3_code": "dkx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "124", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gavar", "iso_1_code": null, "iso_3_code": "gou", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "125", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mina", "iso_1_code": null, "iso_3_code": "hna", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "126", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbudum", "iso_1_code": null, "iso_3_code": "xmd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "127", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "121", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "A.8", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "121", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "A.8", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Bacama", "iso_1_code": null, "iso_3_code": "bcy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "129", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bata", "iso_1_code": null, "iso_3_code": "bta", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "130", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Fali Muchella", "iso_1_code": null, "iso_3_code": "fli", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "131", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gude", "iso_1_code": null, "iso_3_code": "gde", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "132", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Gudu", "iso_1_code": null, "iso_3_code": "gdu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "133", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Holma", "iso_1_code": null, "iso_3_code": "hod", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "134", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jimjimen", "iso_1_code": null, "iso_3_code": "jim", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "135", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngwaba", "iso_1_code": null, "iso_3_code": "ngw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "136", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nzanyi", "iso_1_code": null, "iso_3_code": "nja", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "137", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sharwa", "iso_1_code": null, "iso_3_code": "swq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "138", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tsuvan", "iso_1_code": null, "iso_3_code": "tsh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "139", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zizilivakan", "iso_1_code": null, "iso_3_code": "ziz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "140", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "128", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "58", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "B", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "58", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "B", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "B.1", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Auyokawa", "iso_1_code": null, "iso_3_code": "auo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "143", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jilbe", "iso_1_code": null, "iso_3_code": "jie", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "144", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Buduma", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Buduma", "iso_1_code": null, "iso_3_code": "bdm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "146", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "145", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jina", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jina", "iso_1_code": null, "iso_3_code": "jia", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "148", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Majera", "iso_1_code": null, "iso_3_code": "xmj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "149", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "147", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kotoko Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Afade", "iso_1_code": null, "iso_3_code": "aal", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "152", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mpade", "iso_1_code": null, "iso_3_code": "mpi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "153", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maslam", "iso_1_code": null, "iso_3_code": "msv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "154", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malgbe", "iso_1_code": null, "iso_3_code": "mxf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "155", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "151", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lagwan", "iso_1_code": null, "iso_3_code": "kot", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "157", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mser", "iso_1_code": null, "iso_3_code": "kqx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "158", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "156", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "150", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "142", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "B.2", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Muskum", "iso_1_code": null, "iso_3_code": "mje", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "160", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbara", "iso_1_code": null, "iso_3_code": "mpk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "161", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Musgu", "iso_1_code": null, "iso_3_code": "mug", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "162", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "159", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "141", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "C", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "141", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "C", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Gidar", "iso_1_code": null, "iso_3_code": "gid", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "164", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "163", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "57", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "East", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "57", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "East", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "A", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "A.1", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Buso", "iso_1_code": null, "iso_3_code": "bso", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "168", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "1", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Mulgi", "iso_1_code": null, "iso_3_code": "mvh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "170", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ndam", "iso_1_code": null, "iso_3_code": "ndm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "171", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Soumraye", "iso_1_code": null, "iso_3_code": "sor", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "172", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tumak", "iso_1_code": null, "iso_3_code": "tmc", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "173", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "169", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "2", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Boor", "iso_1_code": null, "iso_3_code": "bvf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "175", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gadang", "iso_1_code": null, "iso_3_code": "gdk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "176", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miltu", "iso_1_code": null, "iso_3_code": "mlj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "177", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sarua", "iso_1_code": null, "iso_3_code": "swy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "178", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "174", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "167", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "A.2", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "167", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "A.2", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "1", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kimr\u00e9", "iso_1_code": null, "iso_3_code": "kqp", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "181", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Lele", "iso_1_code": null, "iso_3_code": "lln", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "182", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nancere", "iso_1_code": null, "iso_3_code": "nnc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "183", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "180", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "2", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gabri", "iso_1_code": null, "iso_3_code": "gab", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "185", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kabalai", "iso_1_code": null, "iso_3_code": "kvf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "186", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tobanga", "iso_1_code": null, "iso_3_code": "tng", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "187", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "184", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "179", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "A.3", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "179", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "A.3", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Kera", "iso_1_code": null, "iso_3_code": "ker", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "189", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kwang", "iso_1_code": null, "iso_3_code": "kvi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "190", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "188", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "166", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "B", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "166", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "B", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "B.1", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "1", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bidiyo", "iso_1_code": null, "iso_3_code": "bid", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "194", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dangal\u00e9at", "iso_1_code": null, "iso_3_code": "daa", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "195", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Jonkor Bourmataguil", "iso_1_code": null, "iso_3_code": "jeu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "196", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mawa", "iso_1_code": null, "iso_3_code": "mcw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "197", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Migaama", "iso_1_code": null, "iso_3_code": "mmy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "198", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mogum", "iso_1_code": null, "iso_3_code": "mou", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "199", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mabire", "iso_1_code": null, "iso_3_code": "muj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "200", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ubi", "iso_1_code": null, "iso_3_code": "ubi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "201", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "193", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "2", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Birgit", "iso_1_code": null, "iso_3_code": "btf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "203", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kajakse", "iso_1_code": null, "iso_3_code": "ckq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "204", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Masmaje", "iso_1_code": null, "iso_3_code": "mes", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "205", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mubi", "iso_1_code": null, "iso_3_code": "mub", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "206", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Toram", "iso_1_code": null, "iso_3_code": "trj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "207", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zerenkel", "iso_1_code": null, "iso_3_code": "zrn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "208", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "202", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "192", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "B.2", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mukulu", "iso_1_code": null, "iso_3_code": "moz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "210", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "209", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "B.3", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Barein", "iso_1_code": null, "iso_3_code": "bva", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "212", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saba", "iso_1_code": null, "iso_3_code": "saa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "213", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sokoro", "iso_1_code": null, "iso_3_code": "sok", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "214", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tamki", "iso_1_code": null, "iso_3_code": "tax", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "215", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "211", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "191", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "165", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Masa", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "165", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Masa", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Herd\u00e9", "iso_1_code": null, "iso_3_code": "hed", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "217", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "P\u00e9v\u00e9", "iso_1_code": null, "iso_3_code": "lme", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "218", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Masana", "iso_1_code": null, "iso_3_code": "mcn", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "219", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Marba", "iso_1_code": null, "iso_3_code": "mpg", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "220", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Musey", "iso_1_code": null, "iso_3_code": "mse", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "221", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ngete", "iso_1_code": null, "iso_3_code": "nnn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "222", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mesme", "iso_1_code": null, "iso_3_code": "zim", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "223", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zumaya", "iso_1_code": null, "iso_3_code": "zuy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "224", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "216", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "West", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "216", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "West", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "A", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "A.1", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Gwandara", "iso_1_code": null, "iso_3_code": "gwn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "228", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hausa", "iso_1_code": "ha", "iso_3_code": "hau", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "229", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "227", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "A.2", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bole", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bole Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bole", "iso_1_code": null, "iso_3_code": "bol", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "233", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bure", "iso_1_code": null, "iso_3_code": "bvh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "234", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Beele", "iso_1_code": null, "iso_3_code": "bxq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "235", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Deno", "iso_1_code": null, "iso_3_code": "dbb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "236", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Daza", "iso_1_code": null, "iso_3_code": "dzd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "237", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Geruma", "iso_1_code": null, "iso_3_code": "gea", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "238", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gera", "iso_1_code": null, "iso_3_code": "gew", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "239", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Galambi", "iso_1_code": null, "iso_3_code": "glo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "240", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Giiwo", "iso_1_code": null, "iso_3_code": "kks", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "241", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kubi", "iso_1_code": null, "iso_3_code": "kof", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "242", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kholok", "iso_1_code": null, "iso_3_code": "ktc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "243", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maaka", "iso_1_code": null, "iso_3_code": "mew", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "244", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngamo", "iso_1_code": null, "iso_3_code": "nbh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "245", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyam", "iso_1_code": null, "iso_3_code": "nmi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "246", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "232", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karekare", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Karekare", "iso_1_code": null, "iso_3_code": "kai", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "248", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "247", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "231", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tangale", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dera", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dera", "iso_1_code": null, "iso_3_code": "kna", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "251", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "250", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tangale Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kutto", "iso_1_code": null, "iso_3_code": "kpa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "253", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kwaami", "iso_1_code": null, "iso_3_code": "ksq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "254", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kushi", "iso_1_code": null, "iso_3_code": "kuh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "255", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pero", "iso_1_code": null, "iso_3_code": "pip", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "256", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Piya-Kwonci", "iso_1_code": null, "iso_3_code": "piy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "257", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tangale", "iso_1_code": null, "iso_3_code": "tan", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "258", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "252", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "249", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "230", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "A.3", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Angas Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Jakattoe", "iso_1_code": null, "iso_3_code": "jrt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "261", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "1", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ngas", "iso_1_code": null, "iso_3_code": "anc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "263", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cakfem-Mushere", "iso_1_code": null, "iso_3_code": "cky", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "264", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Belning", "iso_1_code": null, "iso_3_code": "glb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "265", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kofyar", "iso_1_code": null, "iso_3_code": "kwl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "266", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miship", "iso_1_code": null, "iso_3_code": "mjs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "267", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nteng", "iso_1_code": null, "iso_3_code": "nqt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "268", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mwaghavul", "iso_1_code": null, "iso_3_code": "sur", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "269", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "262", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "2", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Goemai", "iso_1_code": null, "iso_3_code": "ank", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "271", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koenoem", "iso_1_code": null, "iso_3_code": "kcs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "272", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tehl", "iso_1_code": null, "iso_3_code": "mtl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "273", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Piapung", "iso_1_code": null, "iso_3_code": "pcw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "274", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tal", "iso_1_code": null, "iso_3_code": "tal", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "275", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "270", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "260", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yiwom", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ywom", "iso_1_code": null, "iso_3_code": "gek", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "277", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "276", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "259", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "A.4", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Fyer", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Fyer", "iso_1_code": null, "iso_3_code": "fie", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "280", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rom", "iso_1_code": null, "iso_3_code": "tdk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "281", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "279", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ron Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ron", "iso_1_code": null, "iso_3_code": "cla", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "283", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Duhwa", "iso_1_code": null, "iso_3_code": "kbz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "284", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kulere", "iso_1_code": null, "iso_3_code": "kul", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "285", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mindat", "iso_1_code": null, "iso_3_code": "mmf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "286", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sya", "iso_1_code": null, "iso_3_code": "scw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "287", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "282", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "278", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "226", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "B", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "226", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "B", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "B.1", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Teshenawa", "iso_1_code": null, "iso_3_code": "twc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "290", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bade Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bade", "iso_1_code": null, "iso_3_code": "bde", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "292", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngizim", "iso_1_code": null, "iso_3_code": "ngi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "293", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "291", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Duwai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Duwai", "iso_1_code": null, "iso_3_code": "dbp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "295", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "294", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "289", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "B.2", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ajawa", "iso_1_code": null, "iso_3_code": "ajw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "297", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Burku", "iso_1_code": null, "iso_3_code": "bbt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "298", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dirya", "iso_1_code": null, "iso_3_code": "dwa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "299", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zibinju", "iso_1_code": null, "iso_3_code": "jmb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "300", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kariya", "iso_1_code": null, "iso_3_code": "kil", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "301", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vune mi", "iso_1_code": null, "iso_3_code": "mkf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "302", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pa\u2019anci", "iso_1_code": null, "iso_3_code": "pqa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "303", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Siri", "iso_1_code": null, "iso_3_code": "sir", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "304", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Choogen", "iso_1_code": null, "iso_3_code": "tgd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "305", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Warji", "iso_1_code": null, "iso_3_code": "wji", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "306", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "296", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "B.3", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Dass", "iso_1_code": null, "iso_3_code": "dot", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "308", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Boghom", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Boghom", "iso_1_code": null, "iso_3_code": "bux", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "310", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kir-Balar", "iso_1_code": null, "iso_3_code": "kkr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "311", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mansi", "iso_1_code": null, "iso_3_code": "zns", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "312", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "309", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jimi", "iso_1_code": null, "iso_3_code": "jmi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "314", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "313", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Guruntum", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Guruntum-Mbaaru", "iso_1_code": null, "iso_3_code": "grd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "316", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Juu", "iso_1_code": null, "iso_3_code": "juu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "317", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tala", "iso_1_code": null, "iso_3_code": "tak", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "318", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zamwal", "iso_1_code": null, "iso_3_code": "zah", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "319", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "315", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zaar Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Cha\u2019ari", "iso_1_code": null, "iso_3_code": "cxh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "321", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dokshi", "iso_1_code": null, "iso_3_code": "dsk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "322", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dyarim", "iso_1_code": null, "iso_3_code": "dyr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "323", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gyaazi", "iso_1_code": null, "iso_3_code": "gyz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "324", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Luri", "iso_1_code": null, "iso_3_code": "ldd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "325", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dir-Nyamzak-Mbarimi", "iso_1_code": null, "iso_3_code": "nzr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "326", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pesse", "iso_1_code": null, "iso_3_code": "pze", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "327", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saya", "iso_1_code": null, "iso_3_code": "say", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "328", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tulai", "iso_1_code": null, "iso_3_code": "tvi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "329", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Buli", "iso_1_code": null, "iso_3_code": "uly", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "330", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zari", "iso_1_code": null, "iso_3_code": "zaz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "331", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bu", "iso_1_code": null, "iso_3_code": "zbu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "332", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zeem", "iso_1_code": null, "iso_3_code": "zem", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "333", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zul", "iso_1_code": null, "iso_3_code": "zlu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "334", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "320", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "307", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "288", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "225", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "56", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Cushitic", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Ethi": { - "full_object": "SpaCyTokenizer(\"ti\")", - "original_lang_name": "tigrinya", - "original_lang_code": "tir", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "56", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Cushitic", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Xamtanga", "iso_1_code": null, "iso_3_code": "xan", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "338", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "337", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bilen", "iso_1_code": null, "iso_3_code": "byn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "340", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "339", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Awngi", "iso_1_code": null, "iso_3_code": "awn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "342", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "341", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Qimant", "iso_1_code": null, "iso_3_code": "ahg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "344", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "343", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "336", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Ethi": { - "full_object": "SpaCyTokenizer(\"ti\")", - "original_lang_name": "tigrinya", - "original_lang_code": "tir", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Boon", "iso_1_code": null, "iso_3_code": "bnl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "346", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dullay", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ale", "iso_1_code": null, "iso_3_code": "gwd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "348", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tsamai", "iso_1_code": null, "iso_3_code": "tsb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "349", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "347", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Highland", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Ethi": { - "full_object": "SpaCyTokenizer(\"ti\")", - "original_lang_name": "tigrinya", - "original_lang_code": "tir", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Alaba-K\u2019abeena", "iso_1_code": null, "iso_3_code": "alw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "351", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Burji", "iso_1_code": null, "iso_3_code": "bji", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "352", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gedeo", "iso_1_code": null, "iso_3_code": "drs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "353", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hadiyya", "iso_1_code": null, "iso_3_code": "hdy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "354", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kambaata", "iso_1_code": null, "iso_3_code": "ktb", + "children": [], "tokenizers": { "Ethi": { - "full_object": "SpaCyTokenizer(\"ti\")", - "original_lang_name": "tigrinya", - "original_lang_code": "tir", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "script": "Ethi", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "355", + "native_tokenizers": [], "scripts": [ "Ethi" - ], - "own_tokenizer": false + ] }, { "name": "Libido", "iso_1_code": null, "iso_3_code": "liq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "356", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sidamo", "iso_1_code": null, "iso_3_code": "sid", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "357", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "script": "Ethi", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "350", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Konso-Gidole", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Ethi": { - "full_object": "SpaCyTokenizer(\"ti\")", - "original_lang_name": "tigrinya", - "original_lang_code": "tir", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Mositacha", "iso_1_code": null, "iso_3_code": "dox", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "359", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dirasha", "iso_1_code": null, "iso_3_code": "gdl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "360", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Konso", "iso_1_code": null, "iso_3_code": "kxc", + "children": [], "tokenizers": { "Ethi": { - "full_object": "SpaCyTokenizer(\"ti\")", - "original_lang_name": "tigrinya", - "original_lang_code": "tir", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "script": "Ethi", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "361", + "native_tokenizers": [], "scripts": [ "Ethi" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Ethi": { + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "script": "Ethi", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "358", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oromo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Oromo, Borana-Arsi-Guji", "iso_1_code": "om", "iso_3_code": "gax", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "363", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oromo, West Central", "iso_1_code": "om", "iso_3_code": "gaz", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "364", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Oromo, Eastern", "iso_1_code": "om", "iso_3_code": "hae", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "365", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Orma", "iso_1_code": "om", "iso_3_code": "orc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "366", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waata", "iso_1_code": null, "iso_3_code": "ssn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "367", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "362", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Rendille-Boni", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "362", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Rendille-Boni", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Aweer", "iso_1_code": null, "iso_3_code": "bob", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "369", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rendille", "iso_1_code": null, "iso_3_code": "rel", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "370", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "368", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saho-Afar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Afar", "iso_1_code": "aa", "iso_3_code": "aar", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "372", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saho", "iso_1_code": null, "iso_3_code": "ssy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "373", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "371", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Somali", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Dabarre", "iso_1_code": null, "iso_3_code": "dbr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "375", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Garre", "iso_1_code": null, "iso_3_code": "gex", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "376", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Girirra", "iso_1_code": null, "iso_3_code": "gii", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "377", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jiiddu", "iso_1_code": null, "iso_3_code": "jii", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "378", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Somali", "iso_1_code": "so", "iso_3_code": "som", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "379", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tunni", "iso_1_code": null, "iso_3_code": "tqq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "380", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maay", "iso_1_code": null, "iso_3_code": "ymm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "381", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "374", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Western Omo-Tana", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "374", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Western Omo-Tana", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Arbore", "iso_1_code": null, "iso_3_code": "arv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "383", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Baiso", "iso_1_code": null, "iso_3_code": "bsw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "384", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Daasanach", "iso_1_code": null, "iso_3_code": "dsh", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "385", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "El Molo", "iso_1_code": null, "iso_3_code": "elo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "386", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "382", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yaaku", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yaaku", "iso_1_code": null, "iso_3_code": "muu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "388", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "387", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "script": "Ethi", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "345", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bedawiyet", "iso_1_code": null, "iso_3_code": "bej", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "390", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "389", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Aas\u00e1x", "iso_1_code": null, "iso_3_code": "aas", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "392", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Burunge", "iso_1_code": null, "iso_3_code": "bds", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "393", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dahalo", "iso_1_code": null, "iso_3_code": "dal", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "394", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gorowa", "iso_1_code": null, "iso_3_code": "gow", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "395", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Iraqw", "iso_1_code": null, "iso_3_code": "irk", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "396", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Alagwa", "iso_1_code": null, "iso_3_code": "wbj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "397", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kw\u2019adza", "iso_1_code": null, "iso_3_code": "wka", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "398", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "391", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "script": "Ethi", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "335", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Egyptian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Copt": { - "full_object": "StanzaTokenizer(\"cop\")", - "original_lang_name": "coptic", - "original_lang_code": "cop", - "scripts": [ - "Copt" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Coptic", "iso_1_code": null, "iso_3_code": "cop", + "children": [], "tokenizers": { "Copt": { "full_object": "StanzaTokenizer(\"cop\")", "original_lang_name": "coptic", "original_lang_code": "cop", - "scripts": [ - "Copt" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Copt", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "400", - "scripts": [ + "native_tokenizers": [ "Copt" ], - "own_tokenizer": true + "scripts": [ + "Copt" + ] } ], + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "script": "Copt", + "class_name": "StanzaTokenizer" + } + }, "node_i": "399", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Omotic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Ethi": { - "full_object": "SpaCyTokenizer(\"ti\")", - "original_lang_name": "tigrinya", - "original_lang_code": "tir", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Ethi": { - "full_object": "SpaCyTokenizer(\"ti\")", - "original_lang_name": "tigrinya", - "original_lang_code": "tir", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Dizoid", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dizin", "iso_1_code": null, "iso_3_code": "mdx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "404", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nayi", "iso_1_code": null, "iso_3_code": "noz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "405", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sheko", "iso_1_code": null, "iso_3_code": "she", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "406", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "403", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gonga-Gimojan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Ethi": { - "full_object": "SpaCyTokenizer(\"ti\")", - "original_lang_name": "tigrinya", - "original_lang_code": "tir", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Gimojan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Ethi": { - "full_object": "SpaCyTokenizer(\"ti\")", - "original_lang_name": "tigrinya", - "original_lang_code": "tir", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Janjero", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yemsa", "iso_1_code": null, "iso_3_code": "jnj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "410", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "409", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ometo-Gimira", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Ethi": { - "full_object": "SpaCyTokenizer(\"ti\")", - "original_lang_name": "tigrinya", - "original_lang_code": "tir", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Chara", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tsaara", "iso_1_code": null, "iso_3_code": "cra", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "413", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "412", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gimira", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bench", "iso_1_code": null, "iso_3_code": "bcq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "415", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "414", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ometo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Ethi": { - "full_object": "SpaCyTokenizer(\"ti\")", - "original_lang_name": "tigrinya", - "original_lang_code": "tir", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Male", "iso_1_code": null, "iso_3_code": "mdy", + "children": [], "tokenizers": { "Ethi": { - "full_object": "SpaCyTokenizer(\"ti\")", - "original_lang_name": "tigrinya", - "original_lang_code": "tir", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "script": "Ethi", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "417", + "native_tokenizers": [], "scripts": [ "Ethi" - ], - "own_tokenizer": false + ] }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Ethi": { - "full_object": "SpaCyTokenizer(\"ti\")", - "original_lang_name": "tigrinya", - "original_lang_code": "tir", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Dorze", "iso_1_code": null, "iso_3_code": "doz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "419", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dawro", "iso_1_code": null, "iso_3_code": "dwr", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "420", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Gamo", "iso_1_code": null, "iso_3_code": "gmv", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" }, "Ethi": { - "full_object": "SpaCyTokenizer(\"ti\")", - "original_lang_name": "tigrinya", - "original_lang_code": "tir", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "script": "Ethi", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "421", + "native_tokenizers": [], "scripts": [ "Latn", "Ethi" - ], - "own_tokenizer": false + ] }, { "name": "Gofa", "iso_1_code": null, "iso_3_code": "gof", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" }, "Ethi": { - "full_object": "SpaCyTokenizer(\"ti\")", - "original_lang_name": "tigrinya", - "original_lang_code": "tir", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "script": "Ethi", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "422", + "native_tokenizers": [], "scripts": [ "Latn", "Ethi" - ], - "own_tokenizer": false + ] }, { "name": "Melo", "iso_1_code": null, "iso_3_code": "mfx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "423", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oyda", "iso_1_code": null, "iso_3_code": "oyd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "424", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wolaytta", "iso_1_code": null, "iso_3_code": "wal", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" }, "Ethi": { - "full_object": "SpaCyTokenizer(\"ti\")", - "original_lang_name": "tigrinya", - "original_lang_code": "tir", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "script": "Ethi", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "425", + "native_tokenizers": [], "scripts": [ "Latn", "Ethi" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "script": "Ethi", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "418", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Ethi": { - "full_object": "SpaCyTokenizer(\"ti\")", - "original_lang_name": "tigrinya", - "original_lang_code": "tir", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kachama-Ganjule", "iso_1_code": null, "iso_3_code": "kcx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "427", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koorete", "iso_1_code": null, "iso_3_code": "kqy", + "children": [], "tokenizers": { "Ethi": { - "full_object": "SpaCyTokenizer(\"ti\")", - "original_lang_name": "tigrinya", - "original_lang_code": "tir", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "script": "Ethi", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "428", + "native_tokenizers": [], "scripts": [ "Ethi" - ], - "own_tokenizer": false + ] }, { "name": "Zayse", "iso_1_code": null, "iso_3_code": "zay", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "429", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Ethi": { + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "script": "Ethi", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "426", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Basketo", "iso_1_code": null, "iso_3_code": "bst", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "431", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "430", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "script": "Ethi", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "416", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "script": "Ethi", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "411", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "408", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Gonga", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "script": "Ethi", + "class_name": "SpaCyTokenizer" } }, + "node_i": "408", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Gonga", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Anfillo", "iso_1_code": null, "iso_3_code": "myo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "434", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "433", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Borna", "iso_1_code": null, "iso_3_code": "bwo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "436", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "435", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kafa", "iso_1_code": null, "iso_3_code": "kbr", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "438", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Shekkacho", "iso_1_code": null, "iso_3_code": "moy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "439", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "437", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "432", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "script": "Ethi", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "407", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mao", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ganza", "iso_1_code": null, "iso_3_code": "gza", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "441", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hozo", "iso_1_code": null, "iso_3_code": "hoz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "442", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mawes Aasse", "iso_1_code": null, "iso_3_code": "myf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "443", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Seze", "iso_1_code": null, "iso_3_code": "sze", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "444", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "440", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "402", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "South", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "script": "Ethi", + "class_name": "SpaCyTokenizer" } }, - "children": [ - { - "name": "Aari", - "iso_1_code": null, - "iso_3_code": "aiw", - "tokenizers": {}, + "node_i": "402", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "South", + "iso_1_code": null, + "iso_3_code": null, + "children": [ + { + "name": "Aari", + "iso_1_code": null, + "iso_3_code": "aiw", "children": [], + "tokenizers": {}, "node_i": "446", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hamer-Banna", "iso_1_code": null, "iso_3_code": "amf", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "447", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Dime", "iso_1_code": null, "iso_3_code": "dim", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "448", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gayil", "iso_1_code": null, "iso_3_code": "gyl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "449", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karo", "iso_1_code": null, "iso_3_code": "kxh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "450", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "445", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "401", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Semitic", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Hebr": { - "full_object": "SpaCyTokenizer(\"he\")", - "original_lang_name": "hebrew", - "original_lang_code": "heb", - "scripts": [ - "Hebr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" }, "Ethi": { - "full_object": "SpaCyTokenizer(\"ti\")", - "original_lang_name": "tigrinya", - "original_lang_code": "tir", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "script": "Ethi", + "class_name": "SpaCyTokenizer" } }, + "node_i": "401", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Semitic", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Hebr": { - "full_object": "SpaCyTokenizer(\"he\")", - "original_lang_name": "hebrew", - "original_lang_code": "heb", - "scripts": [ - "Hebr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Faifi", "iso_1_code": null, "iso_3_code": "fif", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "453", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "R\u0101zi\u1e25\u012b", "iso_1_code": null, "iso_3_code": "rzh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "454", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aramaic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Syriac", "iso_1_code": null, "iso_3_code": "syc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "457", + "native_tokenizers": [], "scripts": [ "Syrc" - ], - "own_tokenizer": false + ] }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Northeastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Assyrian Neo-Aramaic", "iso_1_code": null, "iso_3_code": "aii", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "460", + "native_tokenizers": [], "scripts": [ "Syrc" - ], - "own_tokenizer": false + ] }, { "name": "Bohtan Neo-Aramaic", "iso_1_code": null, "iso_3_code": "bhn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "461", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Barzani-Sandu Jewish Neo-Aramaic", "iso_1_code": null, "iso_3_code": "bjf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "462", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chaldean Neo-Aramaic", "iso_1_code": null, "iso_3_code": "cld", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "463", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "H\u00e9rtevin", "iso_1_code": null, "iso_3_code": "hrt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "464", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koy Sanjaq Surat", "iso_1_code": null, "iso_3_code": "kqd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "465", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Senaya", "iso_1_code": null, "iso_3_code": "syn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "466", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jewish Babylonian Aramaic", "iso_1_code": null, "iso_3_code": "tmr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "467", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Trans-Zab", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Inter-Zab Jewish Neo-Aramaic", "iso_1_code": null, "iso_3_code": "aij", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "469", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hulaul\u00e1", "iso_1_code": null, "iso_3_code": "huy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "470", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lishana Deni", "iso_1_code": null, "iso_3_code": "lsd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "471", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lish\u00e1n Noshan", "iso_1_code": null, "iso_3_code": "trg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "472", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "468", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "459", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northwestern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mlahs\u00f6", "iso_1_code": null, "iso_3_code": "lhs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "474", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Turoyo", "iso_1_code": null, "iso_3_code": "tru", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "475", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "473", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "458", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mandaic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Neo-Mandaic", "iso_1_code": null, "iso_3_code": "mid", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "477", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mandaic, Classical", "iso_1_code": null, "iso_3_code": "myz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "478", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "476", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "456", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Western Neo-Aramaic", "iso_1_code": null, "iso_3_code": "amw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "480", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Samaritan Aramaic", "iso_1_code": null, "iso_3_code": "sam", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "481", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "479", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "455", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Hebr": { - "full_object": "SpaCyTokenizer(\"he\")", - "original_lang_name": "hebrew", - "original_lang_code": "heb", - "scripts": [ - "Hebr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Arabic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Latn": { - "full_object": "StanzaTokenizer(\"mt\")", - "original_lang_name": "maltese", - "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Arabic, Algerian Saharan", "iso_1_code": "ar", "iso_3_code": "aao", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "484", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Arabic, Tajiki", "iso_1_code": "ar", "iso_3_code": "abh", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "485", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Arabic, Baharna", "iso_1_code": "ar", "iso_3_code": "abv", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "486", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Arabic, Mesopotamian", "iso_1_code": "ar", "iso_3_code": "acm", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "487", + "native_tokenizers": [], "scripts": [ "Arab" - ], - "own_tokenizer": true + ] }, { "name": "Arabic, Ta\u2019izzi-Adeni", "iso_1_code": "ar", "iso_3_code": "acq", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "488", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Arabic, Hijazi", "iso_1_code": "ar", "iso_3_code": "acw", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "489", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Arabic, Omani", "iso_1_code": "ar", "iso_3_code": "acx", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "490", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Arabic, Cypriot", "iso_1_code": "ar", "iso_3_code": "acy", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "491", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Arabic, Dhofari", "iso_1_code": "ar", "iso_3_code": "adf", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "492", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Arabic, Tunisian", "iso_1_code": "ar", "iso_3_code": "aeb", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "493", + "native_tokenizers": [], "scripts": [ "Arab" - ], - "own_tokenizer": true + ] }, { "name": "Arabic, Sa\u2019idi", "iso_1_code": "ar", "iso_3_code": "aec", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "494", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Arabic, Gulf", "iso_1_code": "ar", "iso_3_code": "afb", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "495", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Arabic, Judeo-Moroccan", "iso_1_code": null, "iso_3_code": "aju", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "496", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Arabic, Levantine", "iso_1_code": "ar", "iso_3_code": "apc", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "497", + "native_tokenizers": [], "scripts": [ "Arab" - ], - "own_tokenizer": true + ] }, { "name": "Arabic, Sudanese", "iso_1_code": "ar", "iso_3_code": "apd", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "498", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Arabic, Standard", "iso_1_code": "ar", "iso_3_code": "arb", + "children": [], "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "499", + "native_tokenizers": [], "scripts": [ "Arab", "Latn" - ], - "own_tokenizer": true + ] }, { "name": "Arabic, Algerian", "iso_1_code": "ar", "iso_3_code": "arq", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "500", + "native_tokenizers": [], "scripts": [ "Arab" - ], - "own_tokenizer": true + ] }, { "name": "Arabic, Najdi", "iso_1_code": "ar", "iso_3_code": "ars", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "501", + "native_tokenizers": [], "scripts": [ "Arab" - ], - "own_tokenizer": true + ] }, { "name": "Arabic, Moroccan", "iso_1_code": "ar", "iso_3_code": "ary", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "502", + "native_tokenizers": [], "scripts": [ "Arab" - ], - "own_tokenizer": true + ] }, { "name": "Arabic, Egyptian", "iso_1_code": "ar", "iso_3_code": "arz", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "503", + "native_tokenizers": [], "scripts": [ "Arab" - ], - "own_tokenizer": true + ] }, { "name": "Arabic, Uzbeki", "iso_1_code": "ar", "iso_3_code": "auz", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "504", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Arabic, Eastern Egyptian Bedawi", "iso_1_code": "ar", "iso_3_code": "avl", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "505", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Arabic, Hadrami", "iso_1_code": "ar", "iso_3_code": "ayh", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "506", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Arabic, Libyan", "iso_1_code": "ar", "iso_3_code": "ayl", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "507", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Arabic, Sanaani", "iso_1_code": "ar", "iso_3_code": "ayn", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "508", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Arabic, North Mesopotamian", "iso_1_code": "ar", "iso_3_code": "ayp", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "509", + "native_tokenizers": [], "scripts": [ "Arab" - ], - "own_tokenizer": true + ] }, { "name": "Arabic, Judeo-Yemeni", "iso_1_code": null, "iso_3_code": "jye", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "510", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hassaniyya", "iso_1_code": null, "iso_3_code": "mey", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "511", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maltese", "iso_1_code": "mt", "iso_3_code": "mlt", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"mt\")", "original_lang_name": "maltese", "original_lang_code": "mlt", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "512", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] }, { "name": "Arabic, Chadian", "iso_1_code": "ar", "iso_3_code": "shu", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "513", + "native_tokenizers": [], "scripts": [ "Arab" - ], - "own_tokenizer": true + ] }, { "name": "Arabic, Shihhi", "iso_1_code": "ar", "iso_3_code": "ssh", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "514", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Arabic, Judeo-Iraqi", "iso_1_code": null, "iso_3_code": "yhd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "515", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Arabic, Judeo-Tripolitanian", "iso_1_code": null, "iso_3_code": "yud", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "516", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "483", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Canaanite", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Hebr": { - "full_object": "SpaCyTokenizer(\"he\")", - "original_lang_name": "hebrew", - "original_lang_code": "heb", - "scripts": [ - "Hebr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Hebrew, Ancient", "iso_1_code": null, "iso_3_code": "hbo", + "children": [], "tokenizers": { "Hebr": { "full_object": "StanzaTokenizer(\"hbo\")", "original_lang_name": "ancient_hebrew", "original_lang_code": "hbo", - "scripts": [ - "Hebr" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Hebr", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "518", - "scripts": [ + "native_tokenizers": [ "Hebr" ], - "own_tokenizer": true + "scripts": [ + "Hebr" + ] }, { "name": "Hebrew", "iso_1_code": "he", "iso_3_code": "heb", + "children": [], "tokenizers": { "Hebr": { "full_object": "SpaCyTokenizer(\"he\")", "original_lang_name": "hebrew", "original_lang_code": "heb", - "scripts": [ - "Hebr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Hebr", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "519", - "scripts": [ + "native_tokenizers": [ "Hebr" ], - "own_tokenizer": true + "scripts": [ + "Hebr" + ] }, { "name": "Samaritan Hebrew", "iso_1_code": null, "iso_3_code": "smp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "520", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "script": "Hebr", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "517", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "script": "Hebr", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "482", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "script": "Hebr", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "452", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Ethi": { - "full_object": "SpaCyTokenizer(\"ti\")", - "original_lang_name": "tigrinya", - "original_lang_code": "tir", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ethiopian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Ethi": { - "full_object": "SpaCyTokenizer(\"ti\")", - "original_lang_name": "tigrinya", - "original_lang_code": "tir", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Ethi": { - "full_object": "SpaCyTokenizer(\"ti\")", - "original_lang_name": "tigrinya", - "original_lang_code": "tir", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Dahalik", "iso_1_code": null, "iso_3_code": "dlk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "524", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Geez", "iso_1_code": null, "iso_3_code": "gez", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "525", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tigr\u00e9", "iso_1_code": null, "iso_3_code": "tig", + "children": [], "tokenizers": { "Ethi": { "full_object": "SpaCyTokenizer(\"ti\")", "original_lang_name": "tigrinya", "original_lang_code": "tir", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Ethi", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "526", + "native_tokenizers": [], "scripts": [ "Ethi" - ], - "own_tokenizer": false + ] }, { "name": "Tigrigna", "iso_1_code": "ti", "iso_3_code": "tir", + "children": [], "tokenizers": { "Ethi": { "full_object": "SpaCyTokenizer(\"ti\")", "original_lang_name": "tigrinya", "original_lang_code": "tir", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Ethi", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "527", - "scripts": [ + "native_tokenizers": [ "Ethi" ], - "own_tokenizer": true + "scripts": [ + "Ethi" + ] } ], + "tokenizers": { + "Ethi": { + "full_object": "SpaCyTokenizer(\"ti\")", + "original_lang_name": "tigrinya", + "original_lang_code": "tir", + "script": "Ethi", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "523", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Ethi": { - "full_object": "SpaCyTokenizer(\"am\")", - "original_lang_name": "amharic", - "original_lang_code": "amh", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Outer", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Ethi": { - "full_object": "SpaCyTokenizer(\"am\")", - "original_lang_name": "amharic", - "original_lang_code": "amh", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "n-Group", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gafat", "iso_1_code": null, "iso_3_code": "gft", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "531", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kistane", "iso_1_code": null, "iso_3_code": "gru", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "532", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "530", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "tt-Group", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Ethi": { - "full_object": "SpaCyTokenizer(\"am\")", - "original_lang_name": "amharic", - "original_lang_code": "amh", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Inor", "iso_1_code": null, "iso_3_code": "ior", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "534", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mesqan", "iso_1_code": null, "iso_3_code": "mvz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "535", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mesmes", "iso_1_code": null, "iso_3_code": "mys", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "536", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sebat Bet Gurage", "iso_1_code": null, "iso_3_code": "sgw", + "children": [], "tokenizers": { "Ethi": { "full_object": "SpaCyTokenizer(\"am\")", "original_lang_name": "amharic", "original_lang_code": "amh", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Ethi", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "537", + "native_tokenizers": [], "scripts": [ "Ethi" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Ethi": { + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "script": "Ethi", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "533", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "529", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Transversal", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Ethi": { "full_object": "SpaCyTokenizer(\"am\")", "original_lang_name": "amharic", "original_lang_code": "amh", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Ethi", + "class_name": "SpaCyTokenizer" } }, + "node_i": "529", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Transversal", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Amharic-Argobba", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Ethi": { - "full_object": "SpaCyTokenizer(\"am\")", - "original_lang_name": "amharic", - "original_lang_code": "amh", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Argobba", "iso_1_code": null, "iso_3_code": "agj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "540", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Amharic", "iso_1_code": "am", "iso_3_code": "amh", + "children": [], "tokenizers": { "Ethi": { "full_object": "SpaCyTokenizer(\"am\")", "original_lang_name": "amharic", "original_lang_code": "amh", - "scripts": [ - "Ethi" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Ethi", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "541", - "scripts": [ + "native_tokenizers": [ "Ethi" ], - "own_tokenizer": true + "scripts": [ + "Ethi" + ] } ], + "tokenizers": { + "Ethi": { + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "script": "Ethi", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "539", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Harari-East Gurage", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Harari", "iso_1_code": null, "iso_3_code": "har", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "543", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Silt\u2019e", "iso_1_code": null, "iso_3_code": "stv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "544", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wolane", "iso_1_code": null, "iso_3_code": "wle", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "545", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zay", "iso_1_code": null, "iso_3_code": "zwa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "546", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "542", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Ethi": { + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "script": "Ethi", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "538", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Ethi": { + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "script": "Ethi", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "528", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Ethi": { + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "script": "Ethi", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "522", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South Arabian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bathari", "iso_1_code": null, "iso_3_code": "bhm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "548", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mehri", "iso_1_code": null, "iso_3_code": "gdq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "549", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hoby\u00f3t", "iso_1_code": null, "iso_3_code": "hoh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "550", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Harsusi", "iso_1_code": null, "iso_3_code": "hss", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "551", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shehri", "iso_1_code": null, "iso_3_code": "shv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "552", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Soqotri", "iso_1_code": null, "iso_3_code": "sqt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "553", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "547", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Ethi": { + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "script": "Ethi", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "521", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "script": "Hebr", + "class_name": "SpaCyTokenizer" + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "script": "Ethi", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "451", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ongota", "iso_1_code": null, "iso_3_code": "bxe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "555", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "554", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Copt": { + "full_object": "StanzaTokenizer(\"cop\")", + "original_lang_name": "coptic", + "original_lang_code": "cop", + "script": "Copt", + "class_name": "StanzaTokenizer" + }, + "Latn": { + "full_object": "StanzaTokenizer(\"mt\")", + "original_lang_name": "maltese", + "original_lang_code": "mlt", + "script": "Latn", + "class_name": "StanzaTokenizer" + }, + "Hebr": { + "full_object": "SpaCyTokenizer(\"he\")", + "original_lang_name": "hebrew", + "original_lang_code": "heb", + "script": "Hebr", + "class_name": "SpaCyTokenizer" + }, + "Ethi": { + "full_object": "SpaCyTokenizer(\"am\")", + "original_lang_name": "amharic", + "original_lang_code": "amh", + "script": "Ethi", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "9", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Algic.json b/data/Algic.json index d211f14007af7dd424db5852b2c158d363f1dcd0..d50bab1c7ae72539ea64cb34d17c4ec8da0098e0 100644 --- a/data/Algic.json +++ b/data/Algic.json @@ -2,630 +2,630 @@ "name": "Algic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Algonquian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Blackfoot", "iso_1_code": null, "iso_3_code": "bla", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "558", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Cheyenne", "iso_1_code": null, "iso_3_code": "chy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "559", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Menominee", "iso_1_code": null, "iso_3_code": "mez", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "560", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miami", "iso_1_code": null, "iso_3_code": "mia", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "561", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nawathinehena", "iso_1_code": null, "iso_3_code": "nwa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "562", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shawnee", "iso_1_code": null, "iso_3_code": "sjw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "563", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Arapaho", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Arapaho", "iso_1_code": null, "iso_3_code": "arp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "565", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Gros Ventre", "iso_1_code": null, "iso_3_code": "ats", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "566", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "564", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cree-Montagnais", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Atikamekw", "iso_1_code": null, "iso_3_code": "atj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "568", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Cree, Southern East", "iso_1_code": "cr", "iso_3_code": "crj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "569", + "native_tokenizers": [], "scripts": [ "Cans" - ], - "own_tokenizer": false + ] }, { "name": "Cree, Plains", "iso_1_code": "cr", "iso_3_code": "crk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "570", + "native_tokenizers": [], "scripts": [ "Latn", "Cans" - ], - "own_tokenizer": false + ] }, { "name": "Cree, Northern East", "iso_1_code": "cr", "iso_3_code": "crl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "571", + "native_tokenizers": [], "scripts": [ "Cans" - ], - "own_tokenizer": false + ] }, { "name": "Cree, Moose", "iso_1_code": "cr", "iso_3_code": "crm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "572", + "native_tokenizers": [], "scripts": [ "Cans" - ], - "own_tokenizer": false + ] }, { "name": "Cree, Swampy", "iso_1_code": "cr", "iso_3_code": "csw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "573", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Cree, Woods", "iso_1_code": "cr", "iso_3_code": "cwd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "574", + "native_tokenizers": [], "scripts": [ "Cans" - ], - "own_tokenizer": false + ] }, { "name": "Innu", "iso_1_code": null, "iso_3_code": "moe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "575", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Naskapi", "iso_1_code": null, "iso_3_code": "nsk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "576", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "567", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern Algonquian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Carolina Algonquian", "iso_1_code": null, "iso_3_code": "crr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "578", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Etchemin", "iso_1_code": null, "iso_3_code": "etc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "579", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mi\u2019kmaq", "iso_1_code": null, "iso_3_code": "mic", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "580", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Powhatan", "iso_1_code": null, "iso_3_code": "pim", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "581", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malecite-Passamaquoddy", "iso_1_code": null, "iso_3_code": "pqm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "582", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Quiripi", "iso_1_code": null, "iso_3_code": "qyp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "583", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wampanoag", "iso_1_code": null, "iso_3_code": "wam", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "584", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Loup B", "iso_1_code": null, "iso_3_code": "xlb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "585", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Loup A", "iso_1_code": null, "iso_3_code": "xlo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "586", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Narragansett", "iso_1_code": null, "iso_3_code": "xnt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "587", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mohegan-Pequot", "iso_1_code": null, "iso_3_code": "xpq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "588", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Abenaki", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Abenaki, Eastern", "iso_1_code": null, "iso_3_code": "aaq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "590", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Abenaki, Western", "iso_1_code": null, "iso_3_code": "abe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "591", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "589", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Delaware", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mahican", "iso_1_code": null, "iso_3_code": "mjy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "593", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Munsee", "iso_1_code": null, "iso_3_code": "umu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "594", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unami", "iso_1_code": null, "iso_3_code": "unm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "595", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "592", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nanticoke-Conoy", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nanticoke", "iso_1_code": null, "iso_3_code": "nnt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "597", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Piscataway", "iso_1_code": null, "iso_3_code": "psy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "598", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "596", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "577", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Fox", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kickapoo", "iso_1_code": null, "iso_3_code": "kic", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "600", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Meskwaki", "iso_1_code": null, "iso_3_code": "sac", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "601", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "599", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ojibwa-Potawatomi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Algonquin", "iso_1_code": null, "iso_3_code": "alq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "603", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chippewa", "iso_1_code": "oj", "iso_3_code": "ciw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "604", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ojibwa, Northwestern", "iso_1_code": "oj", "iso_3_code": "ojb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "605", + "native_tokenizers": [], "scripts": [ - "Latn", - "Cans" - ], - "own_tokenizer": false + "Cans", + "Latn" + ] }, { "name": "Ojibwa, Central", "iso_1_code": "oj", "iso_3_code": "ojc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "606", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ojibwa, Eastern", "iso_1_code": "oj", "iso_3_code": "ojg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "607", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oji-Cree", "iso_1_code": "oj", "iso_3_code": "ojs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "608", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ojibwa, Western", "iso_1_code": "oj", "iso_3_code": "ojw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "609", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ottawa", "iso_1_code": "oj", "iso_3_code": "otw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "610", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Potawatomi", "iso_1_code": null, "iso_3_code": "pot", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "611", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "602", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "557", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ritwan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Wiyot", "iso_1_code": null, "iso_3_code": "wiy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "613", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yurok", "iso_1_code": null, "iso_3_code": "yur", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "614", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "612", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "556", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Amto-Musan.json b/data/Amto-Musan.json index 92566c1ba7029cbfb5c9bbc20cc8ee89d46f96d0..4b4642a906e7ae8605121a52c0c9bb72582443d7 100644 --- a/data/Amto-Musan.json +++ b/data/Amto-Musan.json @@ -2,30 +2,30 @@ "name": "Amto-Musan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Amto", "iso_1_code": null, "iso_3_code": "amt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "616", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Siawi", "iso_1_code": null, "iso_3_code": "mmp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "617", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "615", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Andamanese.json b/data/Andamanese.json index e85ec8b4e58308caa73f52a6ab002de1cdb6bb31..71dfdf3055dcfe2e23bb159745e5f4b99bbd50ba 100644 --- a/data/Andamanese.json +++ b/data/Andamanese.json @@ -2,194 +2,194 @@ "name": "Andamanese", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Great Andamanese", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Great Andamanese, Mixed", "iso_1_code": null, "iso_3_code": "gac", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "620", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aka-Bea", "iso_1_code": null, "iso_3_code": "abj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "622", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Akar-Bale", "iso_1_code": null, "iso_3_code": "acl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "623", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aka-Kede", "iso_1_code": null, "iso_3_code": "akx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "624", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aka-Kol", "iso_1_code": null, "iso_3_code": "aky", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "625", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "A-Pucikwar", "iso_1_code": null, "iso_3_code": "apq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "626", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oko-Juwoi", "iso_1_code": null, "iso_3_code": "okj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "627", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "621", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aka-Cari", "iso_1_code": null, "iso_3_code": "aci", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "629", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aka-Kora", "iso_1_code": null, "iso_3_code": "ack", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "630", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aka-Jeru", "iso_1_code": null, "iso_3_code": "akj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "631", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aka-Bo", "iso_1_code": null, "iso_3_code": "akm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "632", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "628", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "619", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South Andamanese", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jarawa", "iso_1_code": null, "iso_3_code": "anq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "634", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "\u00d6\u00f1ge", "iso_1_code": null, "iso_3_code": "oon", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "635", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sentinel", "iso_1_code": null, "iso_3_code": "std", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "636", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "633", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "618", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Arafundi.json b/data/Arafundi.json index fcb0598f3748c47ac82f422e8eab550afddac102..d9346dfe62024b7e51cb32a8c0d8a9fe00279f0d 100644 --- a/data/Arafundi.json +++ b/data/Arafundi.json @@ -2,40 +2,40 @@ "name": "Arafundi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Andai", "iso_1_code": null, "iso_3_code": "afd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "638", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nanubae", "iso_1_code": null, "iso_3_code": "afk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "639", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tapei", "iso_1_code": null, "iso_3_code": "afp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "640", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "637", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Arai (Left May).json b/data/Arai (Left May).json index ef0f39864c0bc3f729bb494b0ce828e33dc2c878..ae8423a138bf040a9e2ed969f8fa93b366a27f2d 100644 --- a/data/Arai (Left May).json +++ b/data/Arai (Left May).json @@ -2,72 +2,72 @@ "name": "Arai (Left May)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sawiyanu", "iso_1_code": null, "iso_3_code": "amm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "642", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bo", "iso_1_code": null, "iso_3_code": "bpw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "643", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yawuno Teneyo", "iso_1_code": null, "iso_3_code": "itr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "644", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nakwi", "iso_1_code": null, "iso_3_code": "nax", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "645", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nimo", "iso_1_code": null, "iso_3_code": "niw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "646", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Owiniga", "iso_1_code": null, "iso_3_code": "owi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "647", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "641", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Arauan.json b/data/Arauan.json index 662983b7dc3928d2b2c907ca9a8a4251340010be..0ba5f8d4e224d425adeaa3583d3f882e4896c3b6 100644 --- a/data/Arauan.json +++ b/data/Arauan.json @@ -2,87 +2,87 @@ "name": "Arauan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aru\u00e1", "iso_1_code": null, "iso_3_code": "aru", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "649", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kulina", "iso_1_code": null, "iso_3_code": "cul", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "650", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Den\u00ed", "iso_1_code": null, "iso_3_code": "dny", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "651", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Paumar\u00ed", "iso_1_code": null, "iso_3_code": "pad", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "652", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Suruah\u00e1", "iso_1_code": null, "iso_3_code": "swx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "653", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jamamadi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jamamad\u00ed", "iso_1_code": null, "iso_3_code": "jaa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "655", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "654", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "648", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Australian.json b/data/Australian.json index cd3a678d44d65d56419a1c41bb3556462dc1969e..9fe7c8ac8ca0163d0f831055778c5d862ebcc751 100644 --- a/data/Australian.json +++ b/data/Australian.json @@ -2,5230 +2,5230 @@ "name": "Australian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lanima", "iso_1_code": null, "iso_3_code": "lnw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "657", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bunaban", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bunuba", "iso_1_code": null, "iso_3_code": "bck", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "659", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gooniyandi", "iso_1_code": null, "iso_3_code": "gni", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "660", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "658", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Daly", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bringen-Wagaydy", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bringen", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Marrithiyel", "iso_1_code": null, "iso_3_code": "mfr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "664", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maridan", "iso_1_code": null, "iso_3_code": "zmd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "665", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Marti Ke", "iso_1_code": null, "iso_3_code": "zmg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "666", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maridjabin", "iso_1_code": null, "iso_3_code": "zmj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "667", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Marimanindji", "iso_1_code": null, "iso_3_code": "zmm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "668", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maringarr", "iso_1_code": null, "iso_3_code": "zmt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "669", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mariyedi", "iso_1_code": null, "iso_3_code": "zmy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "670", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "663", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wagaydy", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ami", "iso_1_code": null, "iso_3_code": "amy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "672", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Giyug", "iso_1_code": null, "iso_3_code": "giy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "673", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wadjiginy", "iso_1_code": null, "iso_3_code": "wdj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "674", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manda", "iso_1_code": null, "iso_3_code": "zma", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "675", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maranunggu", "iso_1_code": null, "iso_3_code": "zmr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "676", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "671", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "662", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malagmalag", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Daly Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kamu", "iso_1_code": null, "iso_3_code": "xmu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "679", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Matngala", "iso_1_code": null, "iso_3_code": "zml", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "680", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "678", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malagmalag Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Malak Malak", "iso_1_code": null, "iso_3_code": "mpb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "682", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuwema", "iso_1_code": null, "iso_3_code": "woa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "683", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "681", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "677", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Marriammu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Marriammu", "iso_1_code": null, "iso_3_code": "xru", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "685", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "684", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Murrinh-Patha", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Murrinh-Patha", "iso_1_code": null, "iso_3_code": "mwf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "687", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ngan\u2019gityemerri", "iso_1_code": null, "iso_3_code": "nam", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "688", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "686", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "661", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Djamindjungan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Djamindjung", "iso_1_code": null, "iso_3_code": "djd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "690", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nungali", "iso_1_code": null, "iso_3_code": "nug", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "691", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "689", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Djeragan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gadjerawang", "iso_1_code": null, "iso_3_code": "gdh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "693", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kija", "iso_1_code": null, "iso_3_code": "gia", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "694", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miriwoong", "iso_1_code": null, "iso_3_code": "mep", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "695", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "692", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Giimbiyu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Erre", "iso_1_code": null, "iso_3_code": "err", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "697", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Urningangg", "iso_1_code": null, "iso_3_code": "urc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "698", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mangerr", "iso_1_code": null, "iso_3_code": "zme", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "699", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "696", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gunwingguan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Burarran", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Burarra", "iso_1_code": null, "iso_3_code": "bvr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "702", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Djeebbana", "iso_1_code": null, "iso_3_code": "djj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "703", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gurr-goni", "iso_1_code": null, "iso_3_code": "gge", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "704", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Na-kara", "iso_1_code": null, "iso_3_code": "nck", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "705", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "701", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Djauanic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Djauan", "iso_1_code": null, "iso_3_code": "djn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "707", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "706", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Enindhilyagwa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Anindilyakwa", "iso_1_code": null, "iso_3_code": "aoi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "709", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ngandi", "iso_1_code": null, "iso_3_code": "nid", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "710", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nunggubuyu", "iso_1_code": null, "iso_3_code": "nuy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "711", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "708", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gagudjuan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gagadu", "iso_1_code": null, "iso_3_code": "gbu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "713", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "712", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gungaraganyan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kungarakany", "iso_1_code": null, "iso_3_code": "ggk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "715", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "714", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gunwinggic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gunwinggu", "iso_1_code": null, "iso_3_code": "gup", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "717", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kunbarlang", "iso_1_code": null, "iso_3_code": "wlg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "718", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "716", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mangarayic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mangarrayi", "iso_1_code": null, "iso_3_code": "mpc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "720", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "719", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maran", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yugul", "iso_1_code": null, "iso_3_code": "ygu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "722", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Alawic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Alawa", "iso_1_code": null, "iso_3_code": "alh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "724", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "723", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mara", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Marra", "iso_1_code": null, "iso_3_code": "mec", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "726", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wandarang", "iso_1_code": null, "iso_3_code": "wnd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "727", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "725", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "721", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngalkbun", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dalabon", "iso_1_code": null, "iso_3_code": "ngk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "729", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "728", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rembargic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ngalakgan", "iso_1_code": null, "iso_3_code": "nig", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "731", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rembarrnga", "iso_1_code": null, "iso_3_code": "rmb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "732", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "730", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wagiman", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Wagiman", "iso_1_code": null, "iso_3_code": "waq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "734", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "733", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Warayan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Waray", "iso_1_code": null, "iso_3_code": "wrz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "736", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "735", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yangmanic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dagoman", "iso_1_code": null, "iso_3_code": "dgn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "738", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yangman", "iso_1_code": null, "iso_3_code": "jng", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "739", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wardaman", "iso_1_code": null, "iso_3_code": "wrr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "740", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "737", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "700", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Laragiyan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Laragia", "iso_1_code": null, "iso_3_code": "lrg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "742", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "741", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Limilngan-Wulna", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Limilngan", "iso_1_code": null, "iso_3_code": "lmc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "744", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wulna", "iso_1_code": null, "iso_3_code": "wux", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "745", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "743", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyulnyulan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bardi", "iso_1_code": null, "iso_3_code": "bcj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "747", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Djawi", "iso_1_code": null, "iso_3_code": "djw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "748", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dyaberdyaber", "iso_1_code": null, "iso_3_code": "dyb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "749", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dyugun", "iso_1_code": null, "iso_3_code": "dyd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "750", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nimanbur", "iso_1_code": null, "iso_3_code": "nmp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "751", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyikina", "iso_1_code": null, "iso_3_code": "nyh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "752", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyulnyul", "iso_1_code": null, "iso_3_code": "nyv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "753", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Warrwa", "iso_1_code": null, "iso_3_code": "wwr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "754", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngumbarl", "iso_1_code": null, "iso_3_code": "xnm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "755", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yawuru", "iso_1_code": null, "iso_3_code": "ywr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "756", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "746", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pama-Nyungan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bidhawal", "iso_1_code": null, "iso_3_code": "ihw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "758", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kurnai", "iso_1_code": null, "iso_3_code": "unn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "759", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Keerray-Woorroong", "iso_1_code": null, "iso_3_code": "wkr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "760", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bindal", "iso_1_code": null, "iso_3_code": "xbd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "761", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bigambal", "iso_1_code": null, "iso_3_code": "xbe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "762", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yuyu", "iso_1_code": null, "iso_3_code": "yxu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "763", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Arandic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Andegerebinha", "iso_1_code": null, "iso_3_code": "adg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "765", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Arrernte, Eastern", "iso_1_code": null, "iso_3_code": "aer", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "766", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Alyawarr", "iso_1_code": null, "iso_3_code": "aly", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "767", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Anmatyerr", "iso_1_code": null, "iso_3_code": "amx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "768", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Arrarnta, Western", "iso_1_code": null, "iso_3_code": "are", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "769", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ayerrerenge", "iso_1_code": null, "iso_3_code": "axe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "770", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aranda, Lower Southern", "iso_1_code": null, "iso_3_code": "axl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "771", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaytetye", "iso_1_code": null, "iso_3_code": "gbb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "772", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "764", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Baagandji", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Paakantyi", "iso_1_code": null, "iso_3_code": "drl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "774", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "773", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bandjalangic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bandjalang", "iso_1_code": null, "iso_3_code": "bdy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "776", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Githabul", "iso_1_code": null, "iso_3_code": "gih", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "777", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Arakwal", "iso_1_code": null, "iso_3_code": "rkw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "778", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Minjungbal", "iso_1_code": null, "iso_3_code": "xjb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "779", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "775", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bungandidj", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gunditjmara", "iso_1_code": null, "iso_3_code": "gjm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "781", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bunganditj", "iso_1_code": null, "iso_3_code": "xbg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "782", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "780", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Durubulic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jandai", "iso_1_code": null, "iso_3_code": "jan", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "784", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nunukul", "iso_1_code": null, "iso_3_code": "xnu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "785", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yagara", "iso_1_code": null, "iso_3_code": "yxg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "786", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "783", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dyangadi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dyangadi", "iso_1_code": null, "iso_3_code": "dyn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "788", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nganyaywana", "iso_1_code": null, "iso_3_code": "nyx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "789", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "787", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dyirbalic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dyirbal", "iso_1_code": null, "iso_3_code": "dbl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "791", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yuru", "iso_1_code": null, "iso_3_code": "ljx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "792", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyawaygi", "iso_1_code": null, "iso_3_code": "nyt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "793", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Warrgamay", "iso_1_code": null, "iso_3_code": "wgy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "794", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "790", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Flinders Island", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Flinders Island", "iso_1_code": null, "iso_3_code": "fln", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "796", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "795", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Galgadungic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kalkutung", "iso_1_code": null, "iso_3_code": "ktg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "798", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wakabunga", "iso_1_code": null, "iso_3_code": "wwb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "799", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yalarnnga", "iso_1_code": null, "iso_3_code": "ylr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "800", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "797", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gumbaynggiric", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kumbainggar", "iso_1_code": null, "iso_3_code": "kgs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "802", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yaygir", "iso_1_code": null, "iso_3_code": "xya", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "803", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "801", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Guugu-Yimidhirr", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Barrow Point", "iso_1_code": null, "iso_3_code": "bpt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "805", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Guugu Yimidhirr", "iso_1_code": null, "iso_3_code": "kky", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "806", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "804", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kala Lagaw Ya", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kala Lagaw Ya", "iso_1_code": null, "iso_3_code": "mwp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "808", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "807", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karnic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Karenggapa", "iso_1_code": null, "iso_3_code": "eaa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "810", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kungardutyi", "iso_1_code": null, "iso_3_code": "gdt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "811", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nhirrpi", "iso_1_code": null, "iso_3_code": "hrp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "812", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kungkari", "iso_1_code": null, "iso_3_code": "lku", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "813", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngantangarra", "iso_1_code": null, "iso_3_code": "ntg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "814", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karuwali", "iso_1_code": null, "iso_3_code": "rxw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "815", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wangkayutyuru", "iso_1_code": null, "iso_3_code": "wky", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "816", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pirriya", "iso_1_code": null, "iso_3_code": "xpa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "817", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yarluyandi", "iso_1_code": null, "iso_3_code": "yry", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "818", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mayawali", "iso_1_code": null, "iso_3_code": "yxa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "819", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karna", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pirlatapa", "iso_1_code": null, "iso_3_code": "bxi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "821", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Diyari", "iso_1_code": null, "iso_3_code": "dif", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "822", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngamini", "iso_1_code": null, "iso_3_code": "nmv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "823", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yandruwandha", "iso_1_code": null, "iso_3_code": "ynd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "824", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yawarawarga", "iso_1_code": null, "iso_3_code": "yww", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "825", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "820", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngura", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Garlali", "iso_1_code": null, "iso_3_code": "gll", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "827", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Badjiri", "iso_1_code": null, "iso_3_code": "jbi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "828", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Punthamara", "iso_1_code": null, "iso_3_code": "xpt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "829", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wangkumara", "iso_1_code": null, "iso_3_code": "xwk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "830", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "826", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Palku", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Arabana", "iso_1_code": null, "iso_3_code": "ard", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "832", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pitta Pitta", "iso_1_code": null, "iso_3_code": "pit", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "833", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wangkangurru", "iso_1_code": null, "iso_3_code": "wgg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "834", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wanggamala", "iso_1_code": null, "iso_3_code": "wnm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "835", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "831", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "809", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kulin", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Daungwurrung", "iso_1_code": null, "iso_3_code": "dgw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "837", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Djadjawurrung", "iso_1_code": null, "iso_3_code": "dja", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "838", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Madhi Madhi", "iso_1_code": null, "iso_3_code": "dmd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "839", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ladji Ladji", "iso_1_code": null, "iso_3_code": "llj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "840", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nari Nari", "iso_1_code": null, "iso_3_code": "rnr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "841", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Djabwurrung", "iso_1_code": null, "iso_3_code": "tjw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "842", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wergaia", "iso_1_code": null, "iso_3_code": "weg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "843", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wathawurrung", "iso_1_code": null, "iso_3_code": "wth", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "844", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Woiwurrung", "iso_1_code": null, "iso_3_code": "wyi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "845", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wadi Wadi", "iso_1_code": null, "iso_3_code": "xwd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "846", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wemba Wemba", "iso_1_code": null, "iso_3_code": "xww", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "847", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Barababaraba", "iso_1_code": null, "iso_3_code": "rbp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "849", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "848", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "836", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lardil", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lardil", "iso_1_code": null, "iso_3_code": "lbz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "851", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "850", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maric", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bidjara", "iso_1_code": null, "iso_3_code": "bym", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "853", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Biri", "iso_1_code": null, "iso_3_code": "bzr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "854", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dhungaloo", "iso_1_code": null, "iso_3_code": "dhx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "855", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gugu Badhun", "iso_1_code": null, "iso_3_code": "gdc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "856", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gangulu", "iso_1_code": null, "iso_3_code": "gnl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "857", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Guwamu", "iso_1_code": null, "iso_3_code": "gwu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "858", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gungabula", "iso_1_code": null, "iso_3_code": "gyf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "859", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gunya", "iso_1_code": null, "iso_3_code": "gyy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "860", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kunggari", "iso_1_code": null, "iso_3_code": "kgl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "861", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yirandali", "iso_1_code": null, "iso_3_code": "ljw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "862", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wadjigu", "iso_1_code": null, "iso_3_code": "wdu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "863", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wadjabangayi", "iso_1_code": null, "iso_3_code": "wdy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "864", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Warungu", "iso_1_code": null, "iso_3_code": "wrg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "865", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lower Burdekin", "iso_1_code": null, "iso_3_code": "xbb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "866", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Garingbal", "iso_1_code": null, "iso_3_code": "xgi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "867", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dharumbal", "iso_1_code": null, "iso_3_code": "xgm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "868", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Guwa", "iso_1_code": null, "iso_3_code": "xgw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "869", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wotjobaluk", "iso_1_code": null, "iso_3_code": "xwt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "870", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yandjibara", "iso_1_code": null, "iso_3_code": "xyb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "871", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yanda", "iso_1_code": null, "iso_3_code": "yda", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "872", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yiningayi", "iso_1_code": null, "iso_3_code": "ygi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "873", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Margany", "iso_1_code": null, "iso_3_code": "zmc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "874", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mandandanyi", "iso_1_code": null, "iso_3_code": "zmk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "875", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "852", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbara", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mbara", "iso_1_code": null, "iso_3_code": "mvl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "877", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "876", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Muruwaric", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Muruwari", "iso_1_code": null, "iso_3_code": "zmu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "879", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "878", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngarinyeric-Yithayithic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dadi Dadi", "iso_1_code": null, "iso_3_code": "dda", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "881", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngarrindjeri", "iso_1_code": null, "iso_3_code": "nay", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "882", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngayawung", "iso_1_code": null, "iso_3_code": "nwg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "883", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "880", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pallanganmiddang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dhudhuroa", "iso_1_code": null, "iso_3_code": "ddr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "885", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pallanganmiddang", "iso_1_code": null, "iso_3_code": "pmd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "886", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "884", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Paman", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Olkol", "iso_1_code": null, "iso_3_code": "olk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "888", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tagalaka", "iso_1_code": null, "iso_3_code": "tgz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "889", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gudang", "iso_1_code": null, "iso_3_code": "xgd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "890", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yatay", "iso_1_code": null, "iso_3_code": "yty", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "891", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yinwum", "iso_1_code": null, "iso_3_code": "yxm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "892", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central Pama", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kunjen", "iso_1_code": null, "iso_3_code": "kjn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "894", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "893", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Coastal Pama", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Koko-Bera", "iso_1_code": null, "iso_3_code": "kkp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "896", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "895", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Flinders Pama", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gugadj", "iso_1_code": null, "iso_3_code": "ggd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "898", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "897", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lamalamic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lamalama", "iso_1_code": null, "iso_3_code": "lby", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "900", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Morrobalama", "iso_1_code": null, "iso_3_code": "umg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "901", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "899", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mayabic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ngawun", "iso_1_code": null, "iso_3_code": "nxn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "903", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wunumara", "iso_1_code": null, "iso_3_code": "wnn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "904", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mayaguduna", "iso_1_code": null, "iso_3_code": "xmy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "905", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mayi-Yapi", "iso_1_code": null, "iso_3_code": "xyj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "906", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mayi-Kulan", "iso_1_code": null, "iso_3_code": "xyk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "907", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mayi-Thakurti", "iso_1_code": null, "iso_3_code": "xyt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "908", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "902", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbariman", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gugu Warra", "iso_1_code": null, "iso_3_code": "wrw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "910", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbariman-Gudhinma", "iso_1_code": null, "iso_3_code": "zmv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "911", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "909", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Middle Pama", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ayabadhu", "iso_1_code": null, "iso_3_code": "ayd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "913", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pakanha", "iso_1_code": null, "iso_3_code": "pkn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "914", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuku-Ugbanh", "iso_1_code": null, "iso_3_code": "ugb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "915", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuku-Uwanh", "iso_1_code": null, "iso_3_code": "uwa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "916", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wik-Epa", "iso_1_code": null, "iso_3_code": "wie", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "917", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wik-Keyangan", "iso_1_code": null, "iso_3_code": "wif", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "918", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wik Ngathan", "iso_1_code": null, "iso_3_code": "wig", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "919", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wik-Me\u2019anha", "iso_1_code": null, "iso_3_code": "wih", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "920", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wik-Iiyanh", "iso_1_code": null, "iso_3_code": "wij", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "921", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wikalkan", "iso_1_code": null, "iso_3_code": "wik", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "922", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wik-Mungkan", "iso_1_code": null, "iso_3_code": "wim", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "923", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Wikngenchera", "iso_1_code": null, "iso_3_code": "wua", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "924", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kugu-Muminh", "iso_1_code": null, "iso_3_code": "xmh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "925", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuku-Mu\u2019inh", "iso_1_code": null, "iso_3_code": "xmp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "926", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuku-Mangk", "iso_1_code": null, "iso_3_code": "xmq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "927", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "912", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Norman Pama", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Areba", "iso_1_code": null, "iso_3_code": "aea", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "929", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gurdjar", "iso_1_code": null, "iso_3_code": "gdj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "930", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuthant", "iso_1_code": null, "iso_3_code": "xut", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "931", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "928", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northeastern Pama", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kanju", "iso_1_code": null, "iso_3_code": "kbe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "933", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuuku-Ya\u2019u", "iso_1_code": null, "iso_3_code": "kuy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "934", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Umpila", "iso_1_code": null, "iso_3_code": "ump", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "935", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "932", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern Pama", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Alngith", "iso_1_code": null, "iso_3_code": "aid", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "937", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Atampaya", "iso_1_code": null, "iso_3_code": "amz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "938", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Angkamuthi", "iso_1_code": null, "iso_3_code": "avm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "939", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Anguthimri", "iso_1_code": null, "iso_3_code": "awg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "940", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ndra\u2019ngith", "iso_1_code": null, "iso_3_code": "dgt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "941", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Adithinngithigh", "iso_1_code": null, "iso_3_code": "dth", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "942", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Awngthim", "iso_1_code": null, "iso_3_code": "gwm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "943", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Leningitij", "iso_1_code": null, "iso_3_code": "lnj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "944", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Arritinngithigh", "iso_1_code": null, "iso_3_code": "rrt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "945", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tjungundji", "iso_1_code": null, "iso_3_code": "tjj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "946", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uradhi", "iso_1_code": null, "iso_3_code": "urf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "947", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mpalitjanh", "iso_1_code": null, "iso_3_code": "xpj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "948", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "936", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rarmul Pama", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aghu-Tharnggala", "iso_1_code": null, "iso_3_code": "gtu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "950", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ikaranggal", "iso_1_code": null, "iso_3_code": "ikr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "951", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Thaypan", "iso_1_code": null, "iso_3_code": "typ", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "952", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "949", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern Pama", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Barbaram", "iso_1_code": null, "iso_3_code": "vmb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "954", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wamin", "iso_1_code": null, "iso_3_code": "wmi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "955", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "953", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southwestern Pama", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kok-Nar", "iso_1_code": null, "iso_3_code": "gko", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "957", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koko Babangk", "iso_1_code": null, "iso_3_code": "okg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "958", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuuk-Yak", "iso_1_code": null, "iso_3_code": "uky", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "959", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "956", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Umbindhamuic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Umbindhamu", "iso_1_code": null, "iso_3_code": "umd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "961", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "960", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western Pama", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kuuk Thayorre", "iso_1_code": null, "iso_3_code": "thd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "963", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "962", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yir Yoront", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yirrk-Mel", "iso_1_code": null, "iso_3_code": "yrm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "965", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yir-Yoront", "iso_1_code": null, "iso_3_code": "yyr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "966", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "964", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "887", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South-West", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kalaamaya", "iso_1_code": null, "iso_3_code": "lkm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "968", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Walangama", "iso_1_code": null, "iso_3_code": "nlw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "969", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Coastal Ngayarda", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Djiwarli", "iso_1_code": null, "iso_3_code": "dze", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "971", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Thiin", "iso_1_code": null, "iso_3_code": "iin", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "972", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nhuwala", "iso_1_code": null, "iso_3_code": "nhf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "973", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngarluma", "iso_1_code": null, "iso_3_code": "nrl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "974", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kariyarra", "iso_1_code": null, "iso_3_code": "vka", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "975", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kurrama", "iso_1_code": null, "iso_3_code": "vku", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "976", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Martuyhunira", "iso_1_code": null, "iso_3_code": "vma", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "977", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yindjibarndi", "iso_1_code": null, "iso_3_code": "yij", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "978", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "970", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dhalandji", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dhalandji", "iso_1_code": null, "iso_3_code": "dhl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "980", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pinigura", "iso_1_code": null, "iso_3_code": "pnv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "981", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "979", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Inland Ngayarda", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dhargari", "iso_1_code": null, "iso_3_code": "dhr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "983", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyamal", "iso_1_code": null, "iso_3_code": "nly", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "984", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngarla", "iso_1_code": null, "iso_3_code": "nrk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "985", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Banyjima", "iso_1_code": null, "iso_3_code": "pnw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "986", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tjurruru", "iso_1_code": null, "iso_3_code": "tju", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "987", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wariyangga", "iso_1_code": null, "iso_3_code": "wri", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "988", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yinhawangka", "iso_1_code": null, "iso_3_code": "ywg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "989", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "982", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kanyara", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bayungu", "iso_1_code": null, "iso_3_code": "bxj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "991", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Burduna", "iso_1_code": null, "iso_3_code": "bxn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "992", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "990", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malgana", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Malgana", "iso_1_code": null, "iso_3_code": "vml", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "994", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "993", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mangala", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mangala", "iso_1_code": null, "iso_3_code": "mem", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "996", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "995", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Marngu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Karajarri", "iso_1_code": null, "iso_3_code": "gbd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "998", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyangumarta", "iso_1_code": null, "iso_3_code": "nna", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "999", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "997", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mirning", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mirning", "iso_1_code": null, "iso_3_code": "gmr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1001", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kalarko", "iso_1_code": null, "iso_3_code": "kba", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1002", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngadjunmaya", "iso_1_code": null, "iso_3_code": "nju", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1003", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1000", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngarga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Warlpiri", "iso_1_code": null, "iso_3_code": "wbp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1005", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Warlmanpa", "iso_1_code": null, "iso_3_code": "wrl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1006", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1004", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngumbin", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jaru", "iso_1_code": null, "iso_3_code": "ddj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1008", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mudburra", "iso_1_code": null, "iso_3_code": "dmw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1009", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gurindji", "iso_1_code": null, "iso_3_code": "gue", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1010", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngarinyman", "iso_1_code": null, "iso_3_code": "nbj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1011", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngardi", "iso_1_code": null, "iso_3_code": "rxd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1012", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Walmajarri", "iso_1_code": null, "iso_3_code": "wmt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1013", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "1007", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyungar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nyungar", "iso_1_code": null, "iso_3_code": "nys", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1015", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pinjarup", "iso_1_code": null, "iso_3_code": "pnj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1016", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wardandi", "iso_1_code": null, "iso_3_code": "wxw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1017", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bibbulman", "iso_1_code": null, "iso_3_code": "xbp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1018", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Goreng", "iso_1_code": null, "iso_3_code": "xgg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1019", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nganakarti", "iso_1_code": null, "iso_3_code": "xnk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1020", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Minang", "iso_1_code": null, "iso_3_code": "xrg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1021", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wajuk", "iso_1_code": null, "iso_3_code": "xwj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1022", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1014", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wadjari", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Badimaya", "iso_1_code": null, "iso_3_code": "bia", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1024", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wajarri", "iso_1_code": null, "iso_3_code": "wbv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1025", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1023", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wati", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Antikarinya", "iso_1_code": null, "iso_3_code": "ant", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1027", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yankunytjatjara", "iso_1_code": null, "iso_3_code": "kdd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1028", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kokata", "iso_1_code": null, "iso_3_code": "ktd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1029", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kukatja", "iso_1_code": null, "iso_3_code": "kux", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1030", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Martu Wangka", "iso_1_code": null, "iso_3_code": "mpj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1031", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngaanyatjarra", "iso_1_code": null, "iso_3_code": "ntj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1032", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pintupi-Luritja", "iso_1_code": null, "iso_3_code": "piu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1033", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pitjantjatjara", "iso_1_code": null, "iso_3_code": "pjt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1034", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Pintiini", "iso_1_code": null, "iso_3_code": "pti", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1035", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tjupany", "iso_1_code": null, "iso_3_code": "tjp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1036", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Warnman", "iso_1_code": null, "iso_3_code": "wbt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1037", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyiyaparli", "iso_1_code": null, "iso_3_code": "xny", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1038", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1026", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yinggarda", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nhanda", "iso_1_code": null, "iso_3_code": "nha", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1040", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yinggarda", "iso_1_code": null, "iso_3_code": "yia", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1041", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1039", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yura", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Adnyamathanha", "iso_1_code": null, "iso_3_code": "adt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1043", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Barngarla", "iso_1_code": null, "iso_3_code": "bjb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1044", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Guyani", "iso_1_code": null, "iso_3_code": "gvy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1045", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngadjuri", "iso_1_code": null, "iso_3_code": "jui", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1046", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Narungga", "iso_1_code": null, "iso_3_code": "nnr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1047", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nugunu", "iso_1_code": null, "iso_3_code": "nnv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1048", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nauo", "iso_1_code": null, "iso_3_code": "nwo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1049", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wirangu", "iso_1_code": null, "iso_3_code": "wgu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1050", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaurna", "iso_1_code": null, "iso_3_code": "zku", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1051", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1042", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "967", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wagaya-Warluwaric", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Warluwara-Thawa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Wagaya", "iso_1_code": null, "iso_3_code": "wga", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1054", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waluwarra", "iso_1_code": null, "iso_3_code": "wrb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1055", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yindjilandji", "iso_1_code": null, "iso_3_code": "yil", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1056", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1053", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1052", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waka-Kabic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gabi-Gabi", "iso_1_code": null, "iso_3_code": "gbw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1058", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Batjala", "iso_1_code": null, "iso_3_code": "xby", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1059", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kingkel", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bayali", "iso_1_code": null, "iso_3_code": "bjy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1061", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1060", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miyan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Wakawaka", "iso_1_code": null, "iso_3_code": "wkw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1063", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wuliwuli", "iso_1_code": null, "iso_3_code": "wlu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1064", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1062", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Than", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gureng Gureng", "iso_1_code": null, "iso_3_code": "gnr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1066", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1065", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1057", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Warumungic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Warumungu", "iso_1_code": null, "iso_3_code": "wrm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1068", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1067", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wiradhuric", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gamilaraay", "iso_1_code": null, "iso_3_code": "kld", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1070", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wiradjuri", "iso_1_code": null, "iso_3_code": "wrh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1071", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wangaaybuwan-Ngiyambaa", "iso_1_code": null, "iso_3_code": "wyb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1072", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1069", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Worimi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Awabakal", "iso_1_code": null, "iso_3_code": "awk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1074", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Worimi", "iso_1_code": null, "iso_3_code": "kda", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1075", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1073", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yalandyic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Djangun", "iso_1_code": null, "iso_3_code": "djf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1077", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuku-Yalanji", "iso_1_code": null, "iso_3_code": "gvn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1078", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Muluridyi", "iso_1_code": null, "iso_3_code": "vmu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1079", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1076", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yanyuwan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yanyuwa", "iso_1_code": null, "iso_3_code": "jao", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1081", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1080", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yarli", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Wadikali", "iso_1_code": null, "iso_3_code": "wdk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1083", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malyangapa", "iso_1_code": null, "iso_3_code": "yga", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1084", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yardliyawarra", "iso_1_code": null, "iso_3_code": "yxl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1085", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1082", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yidinic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Djabugay", "iso_1_code": null, "iso_3_code": "dyy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1087", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yidiny", "iso_1_code": null, "iso_3_code": "yii", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1088", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1086", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yotayotic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yorta Yorta", "iso_1_code": null, "iso_3_code": "xyy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1090", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yabula Yabula", "iso_1_code": null, "iso_3_code": "yxy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1091", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1089", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yugambal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yugambal", "iso_1_code": null, "iso_3_code": "yub", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1093", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1092", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yuin", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dhurga", "iso_1_code": null, "iso_3_code": "dhu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1095", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dharawal", "iso_1_code": null, "iso_3_code": "tbh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1096", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Birrpayi", "iso_1_code": null, "iso_3_code": "xbj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1097", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Darkinyung", "iso_1_code": null, "iso_3_code": "xda", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1098", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dharuk", "iso_1_code": null, "iso_3_code": "xdk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1099", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jaitmatang", "iso_1_code": null, "iso_3_code": "xjt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1100", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngarigu", "iso_1_code": null, "iso_3_code": "xni", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1101", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gundungurra", "iso_1_code": null, "iso_3_code": "xrd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1102", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Thawa", "iso_1_code": null, "iso_3_code": "xtv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1103", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngunawal", "iso_1_code": null, "iso_3_code": "xul", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1104", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1094", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yuulngu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Golpa", "iso_1_code": null, "iso_3_code": "lja", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1106", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dhangu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dhangu-Djangu", "iso_1_code": null, "iso_3_code": "dhg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1108", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yan-nhangu", "iso_1_code": null, "iso_3_code": "jay", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1109", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1107", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dhuwal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dayi", "iso_1_code": null, "iso_3_code": "dax", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1111", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Djambarrpuyngu", "iso_1_code": null, "iso_3_code": "djr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1112", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Dhuwal", "iso_1_code": null, "iso_3_code": "dwu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1113", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dhuwaya", "iso_1_code": null, "iso_3_code": "dwy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1114", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gumatj", "iso_1_code": null, "iso_3_code": "gnn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1115", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Gupapuyngu", "iso_1_code": null, "iso_3_code": "guf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1116", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ritharrngu", "iso_1_code": null, "iso_3_code": "rit", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1117", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1110", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Djinang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Djinba", "iso_1_code": null, "iso_3_code": "djb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1119", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Djinang", "iso_1_code": null, "iso_3_code": "dji", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1120", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1118", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1105", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "757", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tangic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ganggalida", "iso_1_code": null, "iso_3_code": "gcd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1122", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kayardild", "iso_1_code": null, "iso_3_code": "gyd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1123", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyangga", "iso_1_code": null, "iso_3_code": "nny", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1124", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Minkin", "iso_1_code": null, "iso_3_code": "xxm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1125", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1121", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tiwian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tiwi", "iso_1_code": null, "iso_3_code": "tiw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1127", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1126", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Umbugarla-Ngumbur", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ngurmbur", "iso_1_code": null, "iso_3_code": "nrx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1129", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Umbugarla", "iso_1_code": null, "iso_3_code": "umr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1130", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1128", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Barkly", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jingulu", "iso_1_code": null, "iso_3_code": "jig", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1132", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gudanji", "iso_1_code": null, "iso_3_code": "nji", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1133", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wambaya", "iso_1_code": null, "iso_3_code": "wmb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1134", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1131", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Worrorran", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gambera", "iso_1_code": null, "iso_3_code": "gma", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1136", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kwini", "iso_1_code": null, "iso_3_code": "gww", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1137", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngarinyin", "iso_1_code": null, "iso_3_code": "ung", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1138", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miwa", "iso_1_code": null, "iso_3_code": "vmi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1139", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wilawila", "iso_1_code": null, "iso_3_code": "wil", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1140", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wunambal", "iso_1_code": null, "iso_3_code": "wub", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1141", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngarinyinic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Andajin", "iso_1_code": null, "iso_3_code": "ajn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1143", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1142", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western Worrorran", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yawijibaya", "iso_1_code": null, "iso_3_code": "jbw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1145", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Worrorra", "iso_1_code": null, "iso_3_code": "wro", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1146", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unggumi", "iso_1_code": null, "iso_3_code": "xgu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1147", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Umiida", "iso_1_code": null, "iso_3_code": "xud", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1148", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unggaranggu", "iso_1_code": null, "iso_3_code": "xun", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1149", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1144", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1135", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yanyi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Waanyi", "iso_1_code": null, "iso_3_code": "wny", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1151", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Garrwa", "iso_1_code": null, "iso_3_code": "wrk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1152", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "1150", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yiwaidjan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Amaragic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Amurdak", "iso_1_code": null, "iso_3_code": "amg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1155", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1154", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Margic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Margu", "iso_1_code": null, "iso_3_code": "mhg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1157", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1156", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yiwaidjic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Iwaidja", "iso_1_code": null, "iso_3_code": "ibd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1159", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Garig-Ilgar", "iso_1_code": null, "iso_3_code": "ilg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1160", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maung", "iso_1_code": null, "iso_3_code": "mph", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1161", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Manangkari", "iso_1_code": null, "iso_3_code": "znk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1162", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1158", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1153", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "656", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Austro-Asiatic.json b/data/Austro-Asiatic.json index e59c433304ee22c1118b59bf2d3c257af514ea7b..68513102e28fb22ec5bbb702faf280872063161e 100644 --- a/data/Austro-Asiatic.json +++ b/data/Austro-Asiatic.json @@ -2,3264 +2,3158 @@ "name": "Austro-Asiatic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Mon-Khmer", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Aslian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jah Hut", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jah Hut", "iso_1_code": null, "iso_3_code": "jah", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1167", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1166", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North Aslian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chewong", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cheq Wong", "iso_1_code": null, "iso_3_code": "cwg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1170", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1169", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Batek", "iso_1_code": null, "iso_3_code": "btq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1172", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jehai", "iso_1_code": null, "iso_3_code": "jhi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1173", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Minriq", "iso_1_code": null, "iso_3_code": "mnq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1174", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mintil", "iso_1_code": null, "iso_3_code": "mzt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1175", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1171", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tonga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ten\u2019edn", "iso_1_code": null, "iso_3_code": "tnz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1177", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1176", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kintaq", "iso_1_code": null, "iso_3_code": "knq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1179", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kensiu", "iso_1_code": null, "iso_3_code": "kns", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1180", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1178", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1168", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Senoic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lanoh", "iso_1_code": null, "iso_3_code": "lnh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1182", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sab\u00fcm", "iso_1_code": null, "iso_3_code": "sbo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1183", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Semai", "iso_1_code": null, "iso_3_code": "sea", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1184", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Semnam", "iso_1_code": null, "iso_3_code": "ssm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1185", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Temiar", "iso_1_code": null, "iso_3_code": "tea", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1186", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1181", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South Aslian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mah Meri", "iso_1_code": null, "iso_3_code": "mhe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1188", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Semelai", "iso_1_code": null, "iso_3_code": "sza", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1189", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Semaq Beri", "iso_1_code": null, "iso_3_code": "szc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1190", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Temoq", "iso_1_code": null, "iso_3_code": "tmo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1191", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1187", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1165", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern Mon-Khmer", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bahnaric", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Central Bahnaric", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Alak", "iso_1_code": null, "iso_3_code": "alk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1195", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bahnar", "iso_1_code": null, "iso_3_code": "bdq", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"vi\")", "original_lang_name": "vietnamese", "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1196", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Romam", "iso_1_code": null, "iso_3_code": "rmx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1197", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tampuan", "iso_1_code": null, "iso_3_code": "tpu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1198", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1194", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East Bahnaric", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cua", "iso_1_code": null, "iso_3_code": "cua", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1200", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1199", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North Bahnaric", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Katua", "iso_1_code": null, "iso_3_code": "kta", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1202", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kachok", "iso_1_code": null, "iso_3_code": "xkk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1203", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kayong", "iso_1_code": null, "iso_3_code": "kxy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1205", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Takua", "iso_1_code": null, "iso_3_code": "tkz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1206", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1204", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Trieng", "iso_1_code": null, "iso_3_code": "stg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1208", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Talieng", "iso_1_code": null, "iso_3_code": "tdf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1209", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Duan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Halang Doan", "iso_1_code": null, "iso_3_code": "hld", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1211", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1210", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jeh-Halang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Halang", "iso_1_code": null, "iso_3_code": "hal", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1213", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jeh", "iso_1_code": null, "iso_3_code": "jeh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1214", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1212", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rengao", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Rengao", "iso_1_code": null, "iso_3_code": "ren", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1216", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1215", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sedang-Todrah", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sedang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hre", "iso_1_code": null, "iso_3_code": "hre", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1219", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sedang", "iso_1_code": null, "iso_3_code": "sed", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1220", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1218", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Todrah-Monom", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Monom", "iso_1_code": null, "iso_3_code": "moo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1222", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Todrah", "iso_1_code": null, "iso_3_code": "tdr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1223", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1221", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1217", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1207", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1201", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South Bahnaric", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Stieng, Budeh", "iso_1_code": null, "iso_3_code": "stt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1225", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sre-Mnong", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Mnong", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Eastern Mnong", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mnong, Eastern", "iso_1_code": null, "iso_3_code": "mng", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1229", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1228", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern-Central Mnong", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Mnong, Central", "iso_1_code": null, "iso_3_code": "cmo", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"vi\")", "original_lang_name": "vietnamese", "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1231", + "native_tokenizers": [], "scripts": [ "Latn", "Khmr" - ], - "own_tokenizer": false + ] }, { "name": "Mnong, Southern", "iso_1_code": null, "iso_3_code": "mnn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1232", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kraol", "iso_1_code": null, "iso_3_code": "rka", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1233", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1230", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1227", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sre", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Maa", "iso_1_code": null, "iso_3_code": "cma", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1235", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koho", "iso_1_code": null, "iso_3_code": "kpm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1236", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1234", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1226", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Stieng-Chrau", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chrau", "iso_1_code": null, "iso_3_code": "crw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1238", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mel-Khaonh", "iso_1_code": null, "iso_3_code": "hkn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1239", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Stieng, Bulo", "iso_1_code": null, "iso_3_code": "sti", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1240", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1237", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "1224", - "scripts": [], - "own_tokenizer": false - }, - { + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, + "node_i": "1224", + "native_tokenizers": [], + "scripts": [] + }, + { "name": "West Bahnaric", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lavi", "iso_1_code": null, "iso_3_code": "lvi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1242", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Brao-Kravet", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Brao", "iso_1_code": null, "iso_3_code": "brb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1244", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Krung", "iso_1_code": null, "iso_3_code": "krr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1245", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kavet", "iso_1_code": null, "iso_3_code": "krv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1246", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sou", "iso_1_code": null, "iso_3_code": "sqq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1247", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1243", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Laven", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Laven", "iso_1_code": null, "iso_3_code": "lbo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1249", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1248", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyaheun", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nyaheun", "iso_1_code": null, "iso_3_code": "nev", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1251", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1250", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oi-The", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Oy", "iso_1_code": null, "iso_3_code": "oyb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1253", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sapuan", "iso_1_code": null, "iso_3_code": "spu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1254", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1252", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1241", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "1193", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Katuic", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"vi\")", "original_lang_name": "vietnamese", "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "1193", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Katuic", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Central Katuic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ta\u2019oih", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ir", "iso_1_code": null, "iso_3_code": "irr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1258", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ong", "iso_1_code": null, "iso_3_code": "oog", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1259", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ta\u2019oih, Upper", "iso_1_code": null, "iso_3_code": "tth", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1260", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ta\u2019oih, Lower", "iso_1_code": null, "iso_3_code": "tto", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1261", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1257", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1256", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East Katuic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Katu-Pacoh", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Katu, Eastern", "iso_1_code": null, "iso_3_code": "ktv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1264", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Katu, Western", "iso_1_code": null, "iso_3_code": "kuf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1265", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pacoh", "iso_1_code": null, "iso_3_code": "pac", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1266", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Phuong", "iso_1_code": null, "iso_3_code": "phg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1267", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tareng", "iso_1_code": null, "iso_3_code": "tgr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1268", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1263", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngeq-Nkriang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kriang", "iso_1_code": null, "iso_3_code": "ngt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1270", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1269", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1262", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Katuic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bru", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bru, Eastern", "iso_1_code": null, "iso_3_code": "bru", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"vi\")", "original_lang_name": "vietnamese", "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1273", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bru, Western", "iso_1_code": null, "iso_3_code": "brv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1274", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Katang, Northern", "iso_1_code": null, "iso_3_code": "ncq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1275", + "native_tokenizers": [], "scripts": [ "Laoo" - ], - "own_tokenizer": false + ] }, { "name": "Katang, Southern", "iso_1_code": null, "iso_3_code": "sct", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1276", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "So", "iso_1_code": null, "iso_3_code": "sss", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1277", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khua", "iso_1_code": null, "iso_3_code": "xhv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1278", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1272", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuay", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kuay", "iso_1_code": null, "iso_3_code": "kdt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1280", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyeu", "iso_1_code": null, "iso_3_code": "nyl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1281", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1279", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1271", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1255", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khmer", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Khmer", "iso_1_code": "km", "iso_3_code": "khm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1283", + "native_tokenizers": [], "scripts": [ "Khmr" - ], - "own_tokenizer": false + ] }, { "name": "Khmer, Northern", "iso_1_code": null, "iso_3_code": "kxm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1284", + "native_tokenizers": [], "scripts": [ "Thai" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "1282", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pearic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pear", "iso_1_code": null, "iso_3_code": "pcb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1287", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1286", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chong", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chong", "iso_1_code": null, "iso_3_code": "cog", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1290", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chung", "iso_1_code": null, "iso_3_code": "scq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1291", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1289", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Samre", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Somray", "iso_1_code": null, "iso_3_code": "smu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1293", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Samre", "iso_1_code": null, "iso_3_code": "sxm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1294", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1292", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Suoy", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Su\u2019ung", "iso_1_code": null, "iso_3_code": "syo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1296", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1295", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1288", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1285", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1192", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Monic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mon", "iso_1_code": null, "iso_3_code": "mnw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1298", + "native_tokenizers": [], "scripts": [ "Mymr" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "1297", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nicobar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Car", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Nicobarese, Car", "iso_1_code": null, "iso_3_code": "caq", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"vi\")", "original_lang_name": "vietnamese", "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1301", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1300", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chowra-Teressa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chaura", "iso_1_code": null, "iso_3_code": "crv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1303", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teressa", "iso_1_code": null, "iso_3_code": "tef", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1304", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1302", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Great Nicobar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nicobarese, Southern", "iso_1_code": null, "iso_3_code": "nik", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1306", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1305", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nancowry", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nicobarese, Central", "iso_1_code": null, "iso_3_code": "ncb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1308", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1307", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shom Peng", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Shom Peng", "iso_1_code": null, "iso_3_code": "sii", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1310", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1309", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "1299", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Northern Mon-Khmer", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"vi\")", "original_lang_name": "vietnamese", "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "1299", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Northern Mon-Khmer", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Khasian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "War-Jaintia", "iso_1_code": null, "iso_3_code": "aml", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1313", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khasi", "iso_1_code": null, "iso_3_code": "kha", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"vi\")", "original_lang_name": "vietnamese", "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1314", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Lyngngam", "iso_1_code": null, "iso_3_code": "lyg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1315", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pnar", "iso_1_code": null, "iso_3_code": "pbv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1316", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1312", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khmuic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Khao", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Khao", "iso_1_code": null, "iso_3_code": "xao", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1319", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1318", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mal-Khmu\u2019", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Khmu\u2019", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Khuen", "iso_1_code": null, "iso_3_code": "khf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1322", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khmu", "iso_1_code": null, "iso_3_code": "kjg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1323", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "O\u2019du", "iso_1_code": null, "iso_3_code": "tyh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1324", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1321", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mal-Prai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mal", "iso_1_code": null, "iso_3_code": "mlf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1326", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Prai", "iso_1_code": null, "iso_3_code": "prt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1327", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1325", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1320", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mlabri", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mlabri", "iso_1_code": null, "iso_3_code": "mra", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1329", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1328", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Xinh Mul", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Phong-Kniang", "iso_1_code": null, "iso_3_code": "pnx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1331", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Puoc", "iso_1_code": null, "iso_3_code": "puo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1332", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1330", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1317", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mang", "iso_1_code": null, "iso_3_code": "zng", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1334", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1333", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Palaungic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Eastern Palaungic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Angkuic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hu", "iso_1_code": null, "iso_3_code": "huo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1338", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kon Keu", "iso_1_code": null, "iso_3_code": "kkn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1339", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Man Met", "iso_1_code": null, "iso_3_code": "mml", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1340", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mok", "iso_1_code": null, "iso_3_code": "mqt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1341", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Samtao", "iso_1_code": null, "iso_3_code": "stu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1342", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tai Loi", "iso_1_code": null, "iso_3_code": "tlq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1343", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Muak Sa-aak", "iso_1_code": null, "iso_3_code": "ukk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1344", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "U", "iso_1_code": null, "iso_3_code": "uuu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1345", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kiorr", "iso_1_code": null, "iso_3_code": "xko", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1346", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1337", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bit-Khang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bit", "iso_1_code": null, "iso_3_code": "bgk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1348", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bumang", "iso_1_code": null, "iso_3_code": "bvp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1349", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kh\u00e1ng", "iso_1_code": null, "iso_3_code": "kjm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1350", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1347", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lametic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Con", "iso_1_code": null, "iso_3_code": "cno", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1352", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rmeet", "iso_1_code": null, "iso_3_code": "lbn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1353", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1351", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bulang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Blang", "iso_1_code": null, "iso_3_code": "blr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1356", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1355", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lawa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lawa, Western", "iso_1_code": null, "iso_3_code": "lcp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1358", + "native_tokenizers": [], "scripts": [ "Thai" - ], - "own_tokenizer": false + ] }, { "name": "Lawa, Eastern", "iso_1_code": null, "iso_3_code": "lwl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1359", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1357", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Wa, Parauk", "iso_1_code": null, "iso_3_code": "prk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1361", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Awa", "iso_1_code": null, "iso_3_code": "vwa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1362", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wa, Vo", "iso_1_code": null, "iso_3_code": "wbm", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"vi\")", "original_lang_name": "vietnamese", "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1363", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1360", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1354", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1336", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western Palaungic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Danau", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Danau", "iso_1_code": null, "iso_3_code": "dnu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1366", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1365", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Palaung", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Palaung, Ruching", "iso_1_code": null, "iso_3_code": "pce", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1368", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Palaung, Shwe", "iso_1_code": null, "iso_3_code": "pll", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1369", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Palaung, Rumai", "iso_1_code": null, "iso_3_code": "rbb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1370", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1367", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Riang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Riang Lang", "iso_1_code": null, "iso_3_code": "ril", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1372", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Riang Lai", "iso_1_code": null, "iso_3_code": "yin", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1373", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1371", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1364", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1335", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1311", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Palyu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bugan", "iso_1_code": null, "iso_3_code": "bbh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1375", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bolyu", "iso_1_code": null, "iso_3_code": "ply", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1376", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1374", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern Monic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nyahkur", "iso_1_code": null, "iso_3_code": "cbn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1378", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1377", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kemiehua", "iso_1_code": null, "iso_3_code": "kfj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1380", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuanhua", "iso_1_code": null, "iso_3_code": "xnh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1381", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1379", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Viet-Muong", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Chut", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Arem", "iso_1_code": null, "iso_3_code": "aem", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1384", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maleng", "iso_1_code": null, "iso_3_code": "pkt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1385", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chut", "iso_1_code": null, "iso_3_code": "scb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1386", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1383", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cuoi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hung", "iso_1_code": null, "iso_3_code": "hnu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1388", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tho", "iso_1_code": null, "iso_3_code": "tou", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1389", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1387", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Muong", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bo", "iso_1_code": null, "iso_3_code": "bgl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1391", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Muong", "iso_1_code": null, "iso_3_code": "mtq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1392", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngu\u00f4n", "iso_1_code": null, "iso_3_code": "nuo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1393", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1390", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Thavung", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aheu", "iso_1_code": null, "iso_3_code": "thm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1395", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1394", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vietnamese", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Vietnamese", "iso_1_code": "vi", "iso_3_code": "vie", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"vi\")", "original_lang_name": "vietnamese", "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1397", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1396", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1382", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "1164", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Munda", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"vi\")", "original_lang_name": "vietnamese", "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "1164", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Munda", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "North Munda", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kherwari", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Agariya", "iso_1_code": null, "iso_3_code": "agi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1401", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bijori", "iso_1_code": null, "iso_3_code": "bix", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1402", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kodaku", "iso_1_code": null, "iso_3_code": "ksz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1403", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mundari", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Asuri", "iso_1_code": null, "iso_3_code": "asr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1405", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Birhor", "iso_1_code": null, "iso_3_code": "biy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1406", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koda", "iso_1_code": null, "iso_3_code": "cdz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1407", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kol", "iso_1_code": null, "iso_3_code": "ekl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1408", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ho", "iso_1_code": null, "iso_3_code": "hoc", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"vi\")", "original_lang_name": "vietnamese", "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1409", + "native_tokenizers": [], "scripts": [ "Latn", "Wara" - ], - "own_tokenizer": false + ] }, { "name": "Korwa", "iso_1_code": null, "iso_3_code": "kfp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1410", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mundari", "iso_1_code": null, "iso_3_code": "unr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1411", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Munda", "iso_1_code": null, "iso_3_code": "unx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1412", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "1404", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Santali", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"vi\")", "original_lang_name": "vietnamese", "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "1404", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Santali", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Mahali", "iso_1_code": null, "iso_3_code": "mjx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1414", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Santhali", "iso_1_code": null, "iso_3_code": "sat", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"vi\")", "original_lang_name": "vietnamese", "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1415", + "native_tokenizers": [], "scripts": [ "Latn", "Olck" - ], - "own_tokenizer": false + ] }, { "name": "Turi", "iso_1_code": null, "iso_3_code": "trd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1416", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1413", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1400", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Korku", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Korku", "iso_1_code": null, "iso_3_code": "kfq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1418", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1417", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1399", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South Munda", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kharia-Juang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Juang", "iso_1_code": null, "iso_3_code": "jun", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1421", + "native_tokenizers": [], "scripts": [ "Orya" - ], - "own_tokenizer": false + ] }, { "name": "Kharia", "iso_1_code": null, "iso_3_code": "khr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1422", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1420", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koraput Munda", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gutob-Remo-Geta\u2019", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Geta\u2019", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gata\u2019", "iso_1_code": null, "iso_3_code": "gaq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1426", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1425", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gutob-Remo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bondo", "iso_1_code": null, "iso_3_code": "bfw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1428", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gadaba, Bodo", "iso_1_code": null, "iso_3_code": "gbj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1429", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1427", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1424", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sora-Juray-Gorum", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gorum", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Parenga", "iso_1_code": null, "iso_3_code": "pcj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1432", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1431", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sora-Juray", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Juray", "iso_1_code": null, "iso_3_code": "juy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1434", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sora", "iso_1_code": null, "iso_3_code": "srb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1435", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1433", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1430", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1423", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1419", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1398", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1163", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Austronesian.json b/data/Austronesian.json index dade162d9874b0c2defa419b61ed210a6f85aecc..61450cc830364c5c0375c38b5482c01311f33d65 100644 --- a/data/Austronesian.json +++ b/data/Austronesian.json @@ -2,35727 +2,24499 @@ "name": "Austronesian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Atayalic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Atayal", "iso_1_code": null, "iso_3_code": "tay", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1438", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sediq", "iso_1_code": null, "iso_3_code": "trv", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1439", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1437", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bunun", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bunun", "iso_1_code": null, "iso_3_code": "bnn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1441", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1440", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East Formosan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Amis", "iso_1_code": null, "iso_3_code": "ami", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1444", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sakizaya", "iso_1_code": null, "iso_3_code": "szy", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1445", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1443", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Basay", "iso_1_code": null, "iso_3_code": "byq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1447", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kavalan", "iso_1_code": null, "iso_3_code": "ckv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1448", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1446", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southwest", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Siraya", "iso_1_code": null, "iso_3_code": "fos", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1450", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Taivoan", "iso_1_code": null, "iso_3_code": "tvx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1451", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1449", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1442", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malayo-Polynesian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Agta, Villa Viciosa", "iso_1_code": null, "iso_3_code": "dyg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1453", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bali-Sasak-Sumbawa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bali", "iso_1_code": null, "iso_3_code": "ban", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1455", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sasak-Sumbawa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Sasak", "iso_1_code": null, "iso_3_code": "sas", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1457", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sumbawa", "iso_1_code": null, "iso_3_code": "smw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1458", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1456", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1454", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bashiic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Ivatan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Ibatan", "iso_1_code": null, "iso_3_code": "ivb", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1461", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ivatan", "iso_1_code": null, "iso_3_code": "ivv", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1462", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1460", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yami", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yami", "iso_1_code": null, "iso_3_code": "tao", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1464", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1463", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1459", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bilic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bagobo-Klata", "iso_1_code": null, "iso_3_code": "bgi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1466", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teduray", "iso_1_code": null, "iso_3_code": "tiy", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1467", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Blaan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Blaan, Koronadal", "iso_1_code": null, "iso_3_code": "bpr", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1469", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Blaan, Sarangani", "iso_1_code": null, "iso_3_code": "bps", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1470", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1468", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tboli", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Tboli", "iso_1_code": null, "iso_3_code": "tbl", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1472", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1471", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1465", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Celebic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Saluan-Banggai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Banggai", "iso_1_code": null, "iso_3_code": "bgz", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1477", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Balantak", "iso_1_code": null, "iso_3_code": "blz", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1478", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1476", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Andio", "iso_1_code": null, "iso_3_code": "bzb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1480", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saluanic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bobongko", "iso_1_code": null, "iso_3_code": "bgb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1482", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saluan", "iso_1_code": null, "iso_3_code": "loe", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1483", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Batui", "iso_1_code": null, "iso_3_code": "zbt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1484", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1481", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1479", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1475", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southeastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bungku-Tolaki", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "East Coast", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bungku", "iso_1_code": null, "iso_3_code": "bkz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1489", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bahonsuai", "iso_1_code": null, "iso_3_code": "bsu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1490", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wawonii", "iso_1_code": null, "iso_3_code": "wow", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1491", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mori Bawah", "iso_1_code": null, "iso_3_code": "xmz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1492", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kulisusu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Taloki", "iso_1_code": null, "iso_3_code": "tlk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1494", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kulisusu", "iso_1_code": null, "iso_3_code": "vkl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1495", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koroni", "iso_1_code": null, "iso_3_code": "xkq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1496", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1493", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1488", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southwest", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Moronene", "iso_1_code": null, "iso_3_code": "mqn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1498", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1497", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1487", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Interior", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mori Atas", "iso_1_code": null, "iso_3_code": "mzq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1501", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Padoe", "iso_1_code": null, "iso_3_code": "pdo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1502", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tomadino", "iso_1_code": null, "iso_3_code": "tdi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1503", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1500", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Coast", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tolaki", "iso_1_code": null, "iso_3_code": "lbw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1505", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rahambuu", "iso_1_code": null, "iso_3_code": "raz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1506", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kodeoha", "iso_1_code": null, "iso_3_code": "vko", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1507", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waru", "iso_1_code": null, "iso_3_code": "wru", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1508", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1504", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1499", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1486", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Muna-Buton", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Nuclear Muna-Buton", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Buton", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "East Buton", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lasalimu", "iso_1_code": null, "iso_3_code": "llm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1513", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kumbewaha", "iso_1_code": null, "iso_3_code": "xks", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1514", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1512", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Buton", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cia-Cia", "iso_1_code": null, "iso_3_code": "cia", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1516", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1515", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1511", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Munan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Busoa", "iso_1_code": null, "iso_3_code": "bup", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1518", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Munic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kaimbulawa", "iso_1_code": null, "iso_3_code": "zka", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1520", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Liabuku", "iso_1_code": null, "iso_3_code": "lix", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1522", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Muna", "iso_1_code": null, "iso_3_code": "mnb", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1523", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Pancana", "iso_1_code": null, "iso_3_code": "pnp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1524", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kioko", "iso_1_code": null, "iso_3_code": "ues", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1525", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1521", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1519", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1517", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1510", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tukangbesi-Bonerate", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tukang Besi South", "iso_1_code": null, "iso_3_code": "bhq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1527", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bonerate", "iso_1_code": null, "iso_3_code": "bna", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1528", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tukang Besi North", "iso_1_code": null, "iso_3_code": "khc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1529", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1526", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1509", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1485", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1474", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaili-Pamona", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kaili", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Baras", "iso_1_code": null, "iso_3_code": "brs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1533", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tado", "iso_1_code": null, "iso_3_code": "klw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1534", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaili, Da\u2019a", "iso_1_code": null, "iso_3_code": "kzf", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1535", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kaili, Ledo", "iso_1_code": null, "iso_3_code": "lew", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1536", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Moma", "iso_1_code": null, "iso_3_code": "myl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1537", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Topoiyo", "iso_1_code": null, "iso_3_code": "toy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1538", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sedoa", "iso_1_code": null, "iso_3_code": "tvw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1539", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaili, Unde", "iso_1_code": null, "iso_3_code": "unz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1540", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1532", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pamona", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Pamona", "iso_1_code": null, "iso_3_code": "pmf", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1542", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tombelala", "iso_1_code": null, "iso_3_code": "ttp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1543", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1541", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1531", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Rampi", "iso_1_code": null, "iso_3_code": "lje", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1545", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uma", "iso_1_code": null, "iso_3_code": "ppk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1546", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sarudu", "iso_1_code": null, "iso_3_code": "sdu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1547", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Badaic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Behoa", "iso_1_code": null, "iso_3_code": "bep", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1549", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bada", "iso_1_code": null, "iso_3_code": "bhz", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1550", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Napu", "iso_1_code": null, "iso_3_code": "npy", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1551", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1548", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1544", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1530", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tomini-Tolitoli", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Tolitoli", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Boano", "iso_1_code": null, "iso_3_code": "bzl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1554", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Totoli", "iso_1_code": null, "iso_3_code": "txe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1555", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1553", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tomini", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Dondo", "iso_1_code": null, "iso_3_code": "dok", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1558", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lauje", "iso_1_code": null, "iso_3_code": "law", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1559", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tomini", "iso_1_code": null, "iso_3_code": "txm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1560", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1557", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Balaesang", "iso_1_code": null, "iso_3_code": "bls", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1562", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dampelas", "iso_1_code": null, "iso_3_code": "dms", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1563", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Taje", "iso_1_code": null, "iso_3_code": "pee", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1564", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tajio", "iso_1_code": null, "iso_3_code": "tdj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1565", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pendau", "iso_1_code": null, "iso_3_code": "ums", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1566", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1561", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1556", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1552", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wotu-Wolio", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Wotu", "iso_1_code": null, "iso_3_code": "wtw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1568", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kalao", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kalao", "iso_1_code": null, "iso_3_code": "kly", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1570", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Laiyolo", "iso_1_code": null, "iso_3_code": "lji", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1571", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1569", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wolio-Kamaru", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kamaru", "iso_1_code": null, "iso_3_code": "kgx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1573", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wolio", "iso_1_code": null, "iso_3_code": "wlo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1574", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1572", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1567", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1473", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central Luzon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Pampangan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kapampangan", "iso_1_code": null, "iso_3_code": "pam", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1577", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1576", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sambalic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Ayta, Ambala", "iso_1_code": null, "iso_3_code": "abc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1579", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ayta, Abellen", "iso_1_code": null, "iso_3_code": "abp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1580", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ayta, Magbukun", "iso_1_code": null, "iso_3_code": "ayt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1581", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ayta, Mag-Indi", "iso_1_code": null, "iso_3_code": "blx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1582", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sambal, Botolan", "iso_1_code": null, "iso_3_code": "sbl", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1583", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ayta, Mag-antsi", "iso_1_code": null, "iso_3_code": "sgb", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1584", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bolinao", "iso_1_code": null, "iso_3_code": "smk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1585", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sambal", "iso_1_code": null, "iso_3_code": "xsb", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1586", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1578", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sinauna", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dumagat, Remontado", "iso_1_code": null, "iso_3_code": "agv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1588", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1587", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1575", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central-Eastern Malayo-Polynesian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Aru", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Barakai", "iso_1_code": null, "iso_3_code": "baj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1591", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gwatlelir", "iso_1_code": null, "iso_3_code": "bay", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1592", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koba", "iso_1_code": null, "iso_3_code": "kpd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1593", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dobel", "iso_1_code": null, "iso_3_code": "kvo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1594", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kompane", "iso_1_code": null, "iso_3_code": "kvp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1595", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kola", "iso_1_code": null, "iso_3_code": "kvv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1596", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karey", "iso_1_code": null, "iso_3_code": "kyd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1597", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lola", "iso_1_code": null, "iso_3_code": "lcd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1598", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lorang", "iso_1_code": null, "iso_3_code": "lrn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1599", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mariri", "iso_1_code": null, "iso_3_code": "mqi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1600", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tarangan, East", "iso_1_code": null, "iso_3_code": "tre", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1601", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tarangan, West", "iso_1_code": null, "iso_3_code": "txn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1602", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ujir", "iso_1_code": null, "iso_3_code": "udj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1603", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manombai", "iso_1_code": null, "iso_3_code": "woo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1604", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1590", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Babar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Babar, North", "iso_1_code": null, "iso_3_code": "bcd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1607", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dawera-Daweloor", "iso_1_code": null, "iso_3_code": "ddw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1608", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dai", "iso_1_code": null, "iso_3_code": "dij", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1609", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1606", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Masela-South Babar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Masela, West", "iso_1_code": null, "iso_3_code": "mss", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1612", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Masela, Central", "iso_1_code": null, "iso_3_code": "mxz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1613", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Serili", "iso_1_code": null, "iso_3_code": "sve", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1614", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Babar, Southeast", "iso_1_code": null, "iso_3_code": "vbb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1615", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Masela, East", "iso_1_code": null, "iso_3_code": "vme", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1616", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1611", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southwest Babar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Emplawas", "iso_1_code": null, "iso_3_code": "emw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1618", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Imroing", "iso_1_code": null, "iso_3_code": "imr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1619", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tela-Masbuar", "iso_1_code": null, "iso_3_code": "tvm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1620", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1617", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1610", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1605", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bima-Lembata", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Adonara", "iso_1_code": null, "iso_3_code": "adr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1622", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Alor", "iso_1_code": null, "iso_3_code": "aol", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1623", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bima", "iso_1_code": null, "iso_3_code": "bhp", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1624", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ende", "iso_1_code": null, "iso_3_code": "end", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1625", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ile Ape", "iso_1_code": null, "iso_3_code": "ila", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1626", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kedang", "iso_1_code": null, "iso_3_code": "ksx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1627", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kepo\u2019", "iso_1_code": null, "iso_3_code": "kuk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1628", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Komodo", "iso_1_code": null, "iso_3_code": "kvh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1629", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Li\u2019o", "iso_1_code": null, "iso_3_code": "ljl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1630", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Painara", "iso_1_code": null, "iso_3_code": "lmf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1631", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Labalekan-Mingar", "iso_1_code": null, "iso_3_code": "lmj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1632", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lamatuka", "iso_1_code": null, "iso_3_code": "lmq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1633", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lamalera", "iso_1_code": null, "iso_3_code": "lmr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1634", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Levuka", "iso_1_code": null, "iso_3_code": "lvu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1635", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lewoeleng", "iso_1_code": null, "iso_3_code": "lwe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1636", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lewotobi", "iso_1_code": null, "iso_3_code": "lwt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1637", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manggarai", "iso_1_code": null, "iso_3_code": "mqy", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1638", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ngad\u2019a, Eastern", "iso_1_code": null, "iso_3_code": "nea", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1639", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nage", "iso_1_code": null, "iso_3_code": "nxe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1640", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngad\u2019a", "iso_1_code": null, "iso_3_code": "nxg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1641", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Palu\u2019e", "iso_1_code": null, "iso_3_code": "ple", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1642", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rembong", "iso_1_code": null, "iso_3_code": "reb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1643", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Riung", "iso_1_code": null, "iso_3_code": "riu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1644", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rajong", "iso_1_code": null, "iso_3_code": "rjg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1645", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rongga", "iso_1_code": null, "iso_3_code": "ror", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1646", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sika", "iso_1_code": null, "iso_3_code": "ski", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1647", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lamaholot", "iso_1_code": null, "iso_3_code": "slp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1648", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "So\u2019a", "iso_1_code": null, "iso_3_code": "ssq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1649", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wae Rana", "iso_1_code": null, "iso_3_code": "wrx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1650", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ke\u2019o", "iso_1_code": null, "iso_3_code": "xxk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1651", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1621", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central Maluku", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Ambelau", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ambelau", "iso_1_code": null, "iso_3_code": "amv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1654", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1653", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Buru", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lisela", "iso_1_code": null, "iso_3_code": "lcl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1656", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Buru", "iso_1_code": null, "iso_3_code": "mhs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1657", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Moksela", "iso_1_code": null, "iso_3_code": "vms", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1658", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1655", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Manipa", "iso_1_code": null, "iso_3_code": "mqp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1660", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Banda-Geser", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Banda", "iso_1_code": null, "iso_3_code": "bnd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1662", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Geser-Gorom", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bati", "iso_1_code": null, "iso_3_code": "bvt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1664", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Geser-Gorom", "iso_1_code": null, "iso_3_code": "ges", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1665", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Watubela", "iso_1_code": null, "iso_3_code": "wah", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1666", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1663", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1661", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Seram", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bobot", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bobot", "iso_1_code": null, "iso_3_code": "bty", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1669", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1668", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East Seram", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hoti", "iso_1_code": null, "iso_3_code": "hti", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1671", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1670", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manusela-Seti", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Benggoi", "iso_1_code": null, "iso_3_code": "bgy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1673", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Huaulu", "iso_1_code": null, "iso_3_code": "hud", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1674", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Salas", "iso_1_code": null, "iso_3_code": "sgu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1675", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Liana-Seti", "iso_1_code": null, "iso_3_code": "ste", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1676", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sou Upaa", "iso_1_code": null, "iso_3_code": "wha", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1677", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1672", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Masiwang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Masiwang", "iso_1_code": null, "iso_3_code": "bnf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1679", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1678", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nunusaku", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kayeli", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kayeli", "iso_1_code": null, "iso_3_code": "kzl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1682", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1681", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Piru Bay", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Haruku", "iso_1_code": null, "iso_3_code": "hrk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1684", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kaibobo", "iso_1_code": null, "iso_3_code": "kzb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1686", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sepa", "iso_1_code": null, "iso_3_code": "spb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1687", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sou Nama", "iso_1_code": null, "iso_3_code": "tlt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1688", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Seram Straits", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ambon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hitu", "iso_1_code": null, "iso_3_code": "htu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1691", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Laha", "iso_1_code": null, "iso_3_code": "lhh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1692", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tulehu", "iso_1_code": null, "iso_3_code": "tlu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1693", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1690", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Solehua", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Paulohi", "iso_1_code": null, "iso_3_code": "plh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1695", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1694", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uliase", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hatuhaha", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Elpaputi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Amahai", "iso_1_code": null, "iso_3_code": "amq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1699", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nusa Laut", "iso_1_code": null, "iso_3_code": "nul", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1700", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1698", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saparua", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Latu", "iso_1_code": null, "iso_3_code": "ltu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1702", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saparua", "iso_1_code": null, "iso_3_code": "spr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1703", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1701", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1697", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kamarian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kamarian", "iso_1_code": null, "iso_3_code": "kzx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1705", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1704", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1696", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1689", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1685", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Asilulu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Asilulu", "iso_1_code": null, "iso_3_code": "asl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1708", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Seit-Kaitetu", "iso_1_code": null, "iso_3_code": "hik", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1709", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1707", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hoamoal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Larike-Wakasihu", "iso_1_code": null, "iso_3_code": "alo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1711", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Boano", "iso_1_code": null, "iso_3_code": "bzn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1712", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1710", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1706", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1683", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Three Rivers", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Yalahatan", "iso_1_code": null, "iso_3_code": "jal", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1714", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Amalumute", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Northwest Seram", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Luhu", "iso_1_code": null, "iso_3_code": "lcq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1717", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lisabata-Nuniali", "iso_1_code": null, "iso_3_code": "lcs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1718", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hulung", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hulung", "iso_1_code": null, "iso_3_code": "huk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1720", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1719", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Loun", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Loun", "iso_1_code": null, "iso_3_code": "lox", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1722", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1721", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ulat Inai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Alune", "iso_1_code": null, "iso_3_code": "alp", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1724", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Naka\u2019ela", "iso_1_code": null, "iso_3_code": "nae", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1725", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1723", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1716", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1715", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wemale", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Wemale", "iso_1_code": null, "iso_3_code": "weo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1727", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1726", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1713", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1680", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sawai-Nuaulu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nuaulu, North", "iso_1_code": null, "iso_3_code": "nni", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1729", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nuaulu, South", "iso_1_code": null, "iso_3_code": "nxl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1730", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saleman", "iso_1_code": null, "iso_3_code": "sau", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1731", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1728", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1667", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1659", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sula", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mangole", "iso_1_code": null, "iso_3_code": "mqc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1733", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sula", "iso_1_code": null, "iso_3_code": "szn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1734", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Taliabo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kadai", "iso_1_code": null, "iso_3_code": "kzd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1736", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Taliabu", "iso_1_code": null, "iso_3_code": "tlv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1737", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1735", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1732", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1652", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern Malayo-Polynesian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Oceanic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Admiralty Islands", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Manus", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Andra-Hus", "iso_1_code": null, "iso_3_code": "anx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1744", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Elu", "iso_1_code": null, "iso_3_code": "elu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1745", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kurti", "iso_1_code": null, "iso_3_code": "ktm", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1746", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Koro", "iso_1_code": null, "iso_3_code": "kxr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1747", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Leipon", "iso_1_code": null, "iso_3_code": "lek", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1748", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lele", "iso_1_code": null, "iso_3_code": "lle", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1749", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ponam", "iso_1_code": null, "iso_3_code": "ncc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1750", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nali", "iso_1_code": null, "iso_3_code": "nss", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1751", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kele", "iso_1_code": null, "iso_3_code": "sbc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1752", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Titan", "iso_1_code": null, "iso_3_code": "ttv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1753", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ere", "iso_1_code": null, "iso_3_code": "twp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1754", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1743", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mokoreng-Loniu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Loniu", "iso_1_code": null, "iso_3_code": "los", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1756", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Idio", "iso_1_code": null, "iso_3_code": "mft", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1757", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1755", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bipi", "iso_1_code": null, "iso_3_code": "biq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1759", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Likum", "iso_1_code": null, "iso_3_code": "lib", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1760", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyindrou", "iso_1_code": null, "iso_3_code": "lid", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1761", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Hermit", "iso_1_code": null, "iso_3_code": "llf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1762", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mondropolon", "iso_1_code": null, "iso_3_code": "npn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1763", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tulu-Bohuai", "iso_1_code": null, "iso_3_code": "rak", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1764", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sori-Harengan", "iso_1_code": null, "iso_3_code": "sbh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1765", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khehek", "iso_1_code": null, "iso_3_code": "tlx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1766", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1758", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1742", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pak-Tong", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pak-Tong", "iso_1_code": null, "iso_3_code": "pkg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1768", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1767", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southeast Islands", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Paluai", "iso_1_code": null, "iso_3_code": "blq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1770", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lenkau", "iso_1_code": null, "iso_3_code": "ler", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1771", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lou", "iso_1_code": null, "iso_3_code": "loj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1772", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Neherneh", "iso_1_code": null, "iso_3_code": "ncn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1773", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Penchal", "iso_1_code": null, "iso_3_code": "pek", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1774", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1769", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1741", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kaniet", "iso_1_code": null, "iso_3_code": "ktk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1776", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Seimat", "iso_1_code": null, "iso_3_code": "ssg", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1777", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Wuvulu-Aua", "iso_1_code": null, "iso_3_code": "wuv", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1778", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1775", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1740", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central-Eastern Oceanic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Remote Oceanic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Central Pacific", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "East Fijian-Polynesian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "East Fijian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Fijian", "iso_1_code": "fj", "iso_3_code": "fij", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1784", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Gone Dau", "iso_1_code": null, "iso_3_code": "goo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1785", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lauan", "iso_1_code": null, "iso_3_code": "llx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1786", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lomaiviti", "iso_1_code": null, "iso_3_code": "lmv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1787", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1783", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Polynesian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Nuclear", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Rapa", "iso_1_code": null, "iso_3_code": "ray", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1792", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Marquesic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Hawaiian", "iso_1_code": null, "iso_3_code": "haw", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1794", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Marquesan, South", "iso_1_code": null, "iso_3_code": "mqm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1795", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Marquesan, North", "iso_1_code": null, "iso_3_code": "mrq", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1796", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mangareva", "iso_1_code": null, "iso_3_code": "mrv", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1797", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1793", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tahitic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Austral", "iso_1_code": null, "iso_3_code": "aut", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1799", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maori", "iso_1_code": "mi", "iso_3_code": "mri", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1800", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tuamotuan", "iso_1_code": null, "iso_3_code": "pmt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1801", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Penrhyn", "iso_1_code": null, "iso_3_code": "pnh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1802", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cook Islands Maori", "iso_1_code": null, "iso_3_code": "rar", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1803", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Rakahanga-Manihiki", "iso_1_code": null, "iso_3_code": "rkh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1804", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Moriori", "iso_1_code": null, "iso_3_code": "rrm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1805", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tahitian", "iso_1_code": "ty", "iso_3_code": "tah", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1806", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1798", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1791", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rapanui", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Rapa Nui", "iso_1_code": null, "iso_3_code": "rap", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1808", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1807", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1790", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Samoic-Outlier", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "East Uvean-Niuafo\u2019ou", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Niuatoputapu", "iso_1_code": null, "iso_3_code": "nkp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1811", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Niuafo\u2019ou", "iso_1_code": null, "iso_3_code": "num", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1812", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wallisian", "iso_1_code": null, "iso_3_code": "wls", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1813", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1810", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ellicean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kapingamarangi", "iso_1_code": null, "iso_3_code": "kpg", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1815", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Takuu", "iso_1_code": null, "iso_3_code": "nho", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1816", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nukuoro", "iso_1_code": null, "iso_3_code": "nkr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1817", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nukumanu", "iso_1_code": null, "iso_3_code": "nuq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1818", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nukeria", "iso_1_code": null, "iso_3_code": "nur", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1819", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ontong Java", "iso_1_code": null, "iso_3_code": "ojv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1820", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sikaiana", "iso_1_code": null, "iso_3_code": "sky", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1821", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tuvaluan", "iso_1_code": null, "iso_3_code": "tvl", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1822", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1814", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Futunic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Anuta", "iso_1_code": null, "iso_3_code": "aud", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1824", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Futuna, East", "iso_1_code": null, "iso_3_code": "fud", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1825", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Futuna-Aniwa", "iso_1_code": null, "iso_3_code": "fut", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1826", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Emae", "iso_1_code": null, "iso_3_code": "mmw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1827", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rennell-Bellona", "iso_1_code": null, "iso_3_code": "mnv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1828", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mele-Fila", "iso_1_code": null, "iso_3_code": "mxe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1829", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vaeakau-Taumako", "iso_1_code": null, "iso_3_code": "piv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1830", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tikopia", "iso_1_code": null, "iso_3_code": "tkp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1831", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Fagauvea", "iso_1_code": null, "iso_3_code": "uve", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1832", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1823", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pukapuka", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pukapuka", "iso_1_code": null, "iso_3_code": "pkp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1834", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1833", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Samoan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Samoan", "iso_1_code": "sm", "iso_3_code": "smo", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1836", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1835", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tokelauan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Tokelauan", "iso_1_code": null, "iso_3_code": "tkl", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1838", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1837", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "1809", - "scripts": [], - "own_tokenizer": false - } - ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, + "node_i": "1809", + "native_tokenizers": [], + "scripts": [] + } + ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1789", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tongic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Niue", "iso_1_code": null, "iso_3_code": "niu", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1840", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tongan", "iso_1_code": "to", "iso_3_code": "ton", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1841", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1839", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1788", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1782", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Fijian-Rotuman", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Rotuman", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Rotuman", "iso_1_code": null, "iso_3_code": "rtm", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1844", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1843", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Fijian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Namosi-Naitasiri-Serua", "iso_1_code": null, "iso_3_code": "bwb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1846", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Fijian, Western", "iso_1_code": null, "iso_3_code": "wyy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1847", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1845", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1842", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1781", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Loyalty Islands", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Drehu", "iso_1_code": null, "iso_3_code": "dhv", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1849", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Iaai", "iso_1_code": null, "iso_3_code": "iai", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1850", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nengone", "iso_1_code": null, "iso_3_code": "nen", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1851", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1848", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Micronesian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Micronesian Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Ikiribati", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kiribati", "iso_1_code": null, "iso_3_code": "gil", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1855", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1854", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kusaiean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kosraean", "iso_1_code": null, "iso_3_code": "kos", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1857", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1856", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Marshallese", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Marshallese", "iso_1_code": "mh", "iso_3_code": "mah", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1859", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1858", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pohnpeic-Chuukic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Chuukic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Carolinian", "iso_1_code": null, "iso_3_code": "cal", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1862", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chuukese", "iso_1_code": null, "iso_3_code": "chk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1863", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mapia", "iso_1_code": null, "iso_3_code": "mpy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1864", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mortlockese", "iso_1_code": null, "iso_3_code": "mrl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1865", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Namonuito", "iso_1_code": null, "iso_3_code": "nmt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1866", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "P\u00e1\u00e1fang", "iso_1_code": null, "iso_3_code": "pfa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1867", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Puluwatese", "iso_1_code": null, "iso_3_code": "puw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1868", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sonsorolese", "iso_1_code": null, "iso_3_code": "sov", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1869", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Satawalese", "iso_1_code": null, "iso_3_code": "stw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1870", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tobian", "iso_1_code": null, "iso_3_code": "tox", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1871", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tanapag", "iso_1_code": null, "iso_3_code": "tpv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1872", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ulithian", "iso_1_code": null, "iso_3_code": "uli", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1873", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Woleaian", "iso_1_code": null, "iso_3_code": "woe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1874", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1861", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pohnpeic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Mokilese", "iso_1_code": null, "iso_3_code": "mkj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1876", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pingelapese", "iso_1_code": null, "iso_3_code": "pif", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1877", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pohnpeian", "iso_1_code": null, "iso_3_code": "pon", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1878", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1875", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1860", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1853", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nauruan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nauruan", "iso_1_code": "na", "iso_3_code": "nau", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1880", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1879", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1852", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "New Caledonian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Haekic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Haeke", "iso_1_code": null, "iso_3_code": "aek", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1883", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1882", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Haveke", "iso_1_code": null, "iso_3_code": "hvk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1885", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vamale", "iso_1_code": null, "iso_3_code": "mkt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1886", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Cemuh\u00ee", "iso_1_code": null, "iso_3_code": "cam", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1888", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Paic\u00ee", "iso_1_code": null, "iso_3_code": "pri", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1889", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1887", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Extreme Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Caac", "iso_1_code": null, "iso_3_code": "msq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1891", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "N\u00eal\u00eamwa-Nixumwak", "iso_1_code": null, "iso_3_code": "nee", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1892", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yuanga", "iso_1_code": null, "iso_3_code": "nua", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1893", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyel\u00e2yu", "iso_1_code": null, "iso_3_code": "yly", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1894", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1890", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pwaamei", "iso_1_code": null, "iso_3_code": "pme", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1896", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pwapw\u00e2", "iso_1_code": null, "iso_3_code": "pop", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1897", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hmwaveke", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bwatoo", "iso_1_code": null, "iso_3_code": "bwa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1899", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hmwaveke", "iso_1_code": null, "iso_3_code": "mrk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1900", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waamwang", "iso_1_code": null, "iso_3_code": "wmn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1901", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1898", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nemi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Fw\u00e2i", "iso_1_code": null, "iso_3_code": "fwa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1903", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jawe", "iso_1_code": null, "iso_3_code": "jaz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1904", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nemi", "iso_1_code": null, "iso_3_code": "nem", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1905", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pije", "iso_1_code": null, "iso_3_code": "piz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1906", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1902", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1895", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1884", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Extreme Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Drubea", "iso_1_code": null, "iso_3_code": "duf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1909", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Num\u00e8\u00e8", "iso_1_code": null, "iso_3_code": "kdk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1910", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1908", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Wailic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Aji\u00eb", "iso_1_code": null, "iso_3_code": "aji", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1913", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Arh\u00f6", "iso_1_code": null, "iso_3_code": "aok", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1914", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Arh\u00e2", "iso_1_code": null, "iso_3_code": "aqr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1915", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Orowe", "iso_1_code": null, "iso_3_code": "bpk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1916", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Neku", "iso_1_code": null, "iso_3_code": "nek", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1917", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1912", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Xaracuu-Xaragure", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "X\u00e2r\u00e2c\u00f9\u00f9", "iso_1_code": null, "iso_3_code": "ane", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1919", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "X\u00e2r\u00e2gur\u00e8", "iso_1_code": null, "iso_3_code": "axx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1920", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1918", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zire-Tiri", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "T\u00eer\u00ee", "iso_1_code": null, "iso_3_code": "cir", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1922", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "S\u00eesh\u00eb\u00eb", "iso_1_code": null, "iso_3_code": "sih", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1923", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1921", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1911", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1907", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1881", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North and Central Vanuatu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "East Santo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "N\u2019kep", "iso_1_code": null, "iso_3_code": "sku", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1927", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1926", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Butmas-Tur", "iso_1_code": null, "iso_3_code": "bnr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1929", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lorediakarkar", "iso_1_code": null, "iso_3_code": "lnn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1930", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Atin", "iso_1_code": null, "iso_3_code": "plb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1931", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngen", "iso_1_code": null, "iso_3_code": "ssv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1932", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1928", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1925", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malekula Interior", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Labo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ninde", "iso_1_code": null, "iso_3_code": "mwi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1935", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1934", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malekula Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Neverver", "iso_1_code": null, "iso_3_code": "lgk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1937", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Larevat", "iso_1_code": null, "iso_3_code": "lrv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1938", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Litzlitz", "iso_1_code": null, "iso_3_code": "lzl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1939", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maragus", "iso_1_code": null, "iso_3_code": "mrs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1940", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "V\u2019\u00ebnen Taut", "iso_1_code": null, "iso_3_code": "nmb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1941", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nasarian", "iso_1_code": null, "iso_3_code": "nvh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1942", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Avava", "iso_1_code": null, "iso_3_code": "tmb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1943", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Neve\u2019ei", "iso_1_code": null, "iso_3_code": "vnm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1944", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1936", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Small Nambas", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dixon Reef", "iso_1_code": null, "iso_3_code": "dix", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1946", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Letemboi", "iso_1_code": null, "iso_3_code": "nms", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1947", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Repanbitip", "iso_1_code": null, "iso_3_code": "rpn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1948", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1945", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1933", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northeast Vanuatu-Banks Islands", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Central Vanuatu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Efate, South", "iso_1_code": null, "iso_3_code": "erk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1951", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eton", "iso_1_code": null, "iso_3_code": "etn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1952", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Efate, North", "iso_1_code": null, "iso_3_code": "llp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1953", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lelepa", "iso_1_code": null, "iso_3_code": "lpa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1954", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Namakura", "iso_1_code": null, "iso_3_code": "nmk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1955", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1950", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East Vanuatu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Apma", "iso_1_code": null, "iso_3_code": "app", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1957", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Daakaka", "iso_1_code": null, "iso_3_code": "bpa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1958", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Baetora", "iso_1_code": null, "iso_3_code": "btr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1959", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lonwolwol", "iso_1_code": null, "iso_3_code": "crc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1960", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Fanbak", "iso_1_code": null, "iso_3_code": "fnb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1961", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hiw", "iso_1_code": null, "iso_3_code": "hiw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1962", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koro", "iso_1_code": null, "iso_3_code": "krf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1963", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lo-Toga", "iso_1_code": null, "iso_3_code": "lht", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1964", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lakon", "iso_1_code": null, "iso_3_code": "lkn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1965", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hano", "iso_1_code": null, "iso_3_code": "lml", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1966", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lemerig", "iso_1_code": null, "iso_3_code": "lrz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1967", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mwotlap", "iso_1_code": null, "iso_3_code": "mlv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1968", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ambrym, North", "iso_1_code": null, "iso_3_code": "mmg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1969", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Marino", "iso_1_code": null, "iso_3_code": "mrb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1970", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mwerlap", "iso_1_code": null, "iso_3_code": "mrm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1971", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vur\u00ebs", "iso_1_code": null, "iso_3_code": "msn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1972", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mota", "iso_1_code": null, "iso_3_code": "mtt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1973", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maewo, Central", "iso_1_code": null, "iso_3_code": "mwo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1974", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ambae, West", "iso_1_code": null, "iso_3_code": "nnd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1975", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Olrat", "iso_1_code": null, "iso_3_code": "olr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1976", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ambae, East", "iso_1_code": null, "iso_3_code": "omb", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1977", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Paama", "iso_1_code": null, "iso_3_code": "pma", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1978", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Port Vato", "iso_1_code": null, "iso_3_code": "ptv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1979", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sa", "iso_1_code": null, "iso_3_code": "sax", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1980", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ske", "iso_1_code": null, "iso_3_code": "ske", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1981", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sowa", "iso_1_code": null, "iso_3_code": "sww", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1982", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nume", "iso_1_code": null, "iso_3_code": "tgs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1983", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lehali", "iso_1_code": null, "iso_3_code": "tql", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1984", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ambrym, Southeast", "iso_1_code": null, "iso_3_code": "tvk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1985", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "L\u00f6y\u00f6p", "iso_1_code": null, "iso_3_code": "urr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1986", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vera\u2019a", "iso_1_code": null, "iso_3_code": "vra", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1987", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dorig", "iso_1_code": null, "iso_3_code": "wwo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1988", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1956", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Epi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bieria-Maii", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bieria", "iso_1_code": null, "iso_3_code": "brj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1991", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maii", "iso_1_code": null, "iso_3_code": "mmm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1992", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1990", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lamenu-Baki", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Baki-Bierebo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Baki", "iso_1_code": null, "iso_3_code": "bki", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1995", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bierebo", "iso_1_code": null, "iso_3_code": "bnk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1996", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1994", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lamenu-Lewo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Lamenu", "iso_1_code": null, "iso_3_code": "lmu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1998", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lewo", "iso_1_code": null, "iso_3_code": "lww", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1999", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1997", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1993", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1989", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malekula Coastal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Axamb", "iso_1_code": null, "iso_3_code": "ahb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2001", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aulua", "iso_1_code": null, "iso_3_code": "aul", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2002", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maskelynes", "iso_1_code": null, "iso_3_code": "klv", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2003", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Malua Bay", "iso_1_code": null, "iso_3_code": "mll", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2004", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Na\u2019ahai", "iso_1_code": null, "iso_3_code": "mlx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2005", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mae", "iso_1_code": null, "iso_3_code": "mme", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2006", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mpotovoro", "iso_1_code": null, "iso_3_code": "mvt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2007", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unua", "iso_1_code": null, "iso_3_code": "onu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2008", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rerep", "iso_1_code": null, "iso_3_code": "pgk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2009", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Port Sandwich", "iso_1_code": null, "iso_3_code": "psw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2010", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nahavaq", "iso_1_code": null, "iso_3_code": "sns", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2011", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uripiv-Wala-Rano-Atchin", "iso_1_code": null, "iso_3_code": "upv", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2012", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Vao", "iso_1_code": null, "iso_3_code": "vao", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2013", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Banam Bay", "iso_1_code": null, "iso_3_code": "vrt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2014", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2000", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Santo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Soro-n Raki", "iso_1_code": null, "iso_3_code": "akr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2016", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Amblong", "iso_1_code": null, "iso_3_code": "alm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2017", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aore", "iso_1_code": null, "iso_3_code": "aor", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2018", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kiae", "iso_1_code": null, "iso_3_code": "frt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2019", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Merei", "iso_1_code": null, "iso_3_code": "lmb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2020", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mafea", "iso_1_code": null, "iso_3_code": "mkv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2021", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malo", "iso_1_code": null, "iso_3_code": "mla", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2022", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tiale", "iso_1_code": null, "iso_3_code": "mnl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2023", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Morouas", "iso_1_code": null, "iso_3_code": "mrp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2024", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tanokuku", "iso_1_code": null, "iso_3_code": "nkk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2025", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Varsaf", "iso_1_code": null, "iso_3_code": "nrg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2026", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Balen", "iso_1_code": null, "iso_3_code": "nsw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2027", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tapiafaru", "iso_1_code": null, "iso_3_code": "ptr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2028", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mores", "iso_1_code": null, "iso_3_code": "rga", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2029", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tangoa", "iso_1_code": null, "iso_3_code": "tgp", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2030", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tolomako", "iso_1_code": null, "iso_3_code": "tlm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2031", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tambotalo", "iso_1_code": null, "iso_3_code": "tls", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2032", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vinekula", "iso_1_code": null, "iso_3_code": "tmi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2033", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oa", "iso_1_code": null, "iso_3_code": "tmt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2034", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Akei", "iso_1_code": null, "iso_3_code": "tsr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2035", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tavanlav", "iso_1_code": null, "iso_3_code": "vlp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2036", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tapesena", "iso_1_code": null, "iso_3_code": "vnp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2037", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Moiso", "iso_1_code": null, "iso_3_code": "wlr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2038", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jo", "iso_1_code": null, "iso_3_code": "wsi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2039", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2015", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1949", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1924", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1780", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South Vanuatu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Aneityum", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aneityum", "iso_1_code": null, "iso_3_code": "aty", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2042", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2041", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Erromanga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sie", "iso_1_code": null, "iso_3_code": "erg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2044", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ifo", "iso_1_code": null, "iso_3_code": "iff", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2045", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ura", "iso_1_code": null, "iso_3_code": "uur", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2046", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2043", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tanna", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Tanna, Southwest", "iso_1_code": null, "iso_3_code": "nwi", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2048", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kwamera", "iso_1_code": null, "iso_3_code": "tnk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2049", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Lenakel", "iso_1_code": null, "iso_3_code": "tnl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2050", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tanna, North", "iso_1_code": null, "iso_3_code": "tnn", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2051", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Whitesands", "iso_1_code": null, "iso_3_code": "tnp", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2052", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2047", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2040", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southeast Solomonic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Gela-Guadalcanal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bughotu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bughotu", "iso_1_code": null, "iso_3_code": "bgt", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2056", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2055", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gela", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Lengo", "iso_1_code": null, "iso_3_code": "lgr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2058", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gela", "iso_1_code": null, "iso_3_code": "nlg", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2059", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2057", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Guadalcanal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Birao", "iso_1_code": null, "iso_3_code": "brr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2061", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ghari", "iso_1_code": null, "iso_3_code": "gri", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2062", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malango", "iso_1_code": null, "iso_3_code": "mln", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2063", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Talise", "iso_1_code": null, "iso_3_code": "tlr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2064", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2060", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2054", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malaita-San Cristobal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Malaita", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Longgu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Longgu", "iso_1_code": null, "iso_3_code": "lgu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2068", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2067", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Baelelea", "iso_1_code": null, "iso_3_code": "bvc", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2070", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Baeggu", "iso_1_code": null, "iso_3_code": "bvd", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2071", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Fataleka", "iso_1_code": null, "iso_3_code": "far", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2072", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Gula\u2019alaa", "iso_1_code": null, "iso_3_code": "gmb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2073", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kwaio", "iso_1_code": null, "iso_3_code": "kwd", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2074", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kwara\u2019ae", "iso_1_code": null, "iso_3_code": "kwf", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2075", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Wala", "iso_1_code": null, "iso_3_code": "lgl", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2076", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Lau", "iso_1_code": null, "iso_3_code": "llu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2077", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "To\u2019abaita", "iso_1_code": null, "iso_3_code": "mlu", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2078", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2069", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "\u2019Are\u2019are", "iso_1_code": null, "iso_3_code": "alu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2080", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sa\ua78ca", "iso_1_code": null, "iso_3_code": "apb", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2081", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Dori\u2019o", "iso_1_code": null, "iso_3_code": "dor", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2082", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oroha", "iso_1_code": null, "iso_3_code": "ora", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2083", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2079", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2066", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "San Cristobal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kahua", "iso_1_code": null, "iso_3_code": "agw", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2085", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Arosi", "iso_1_code": null, "iso_3_code": "aia", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2086", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bauro", "iso_1_code": null, "iso_3_code": "bxa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2087", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Fagani", "iso_1_code": null, "iso_3_code": "faf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2088", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Owa", "iso_1_code": null, "iso_3_code": "stn", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2089", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2084", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2065", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2053", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1779", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "St. Matthias", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Mussau-Emira", "iso_1_code": null, "iso_3_code": "emi", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2091", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tenis", "iso_1_code": null, "iso_3_code": "tns", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2092", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2090", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Temotu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Reefs-Santa Cruz", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "\u00c4iwoo", "iso_1_code": null, "iso_3_code": "nfl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2095", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Engdewu", "iso_1_code": null, "iso_3_code": "ngr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2096", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nal\u00f6go", "iso_1_code": null, "iso_3_code": "nlz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2097", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Noip\u00e4", "iso_1_code": null, "iso_3_code": "npx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2098", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nat\u00fcgu", "iso_1_code": null, "iso_3_code": "ntu", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2099", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2094", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Utupua-Vanikoro", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Teanu", "iso_1_code": null, "iso_3_code": "tkw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2101", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tanema", "iso_1_code": null, "iso_3_code": "tnx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2102", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lovono", "iso_1_code": null, "iso_3_code": "vnk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2103", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Utupua", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Asumboa", "iso_1_code": null, "iso_3_code": "aua", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2105", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tanibili", "iso_1_code": null, "iso_3_code": "tbe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2106", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Amba", "iso_1_code": null, "iso_3_code": "utp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2107", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2104", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2100", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2093", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western Oceanic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Meso Melanesian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bali-Vitu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Uneapa", "iso_1_code": null, "iso_3_code": "bbn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2111", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vitu", "iso_1_code": null, "iso_3_code": "wiv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2112", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2110", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "New Ireland", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Lavongai-Nalik", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Tungag", "iso_1_code": null, "iso_3_code": "lcm", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2115", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kara", "iso_1_code": null, "iso_3_code": "leu", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2116", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Lakurumau", "iso_1_code": null, "iso_3_code": "lxm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2117", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nalik", "iso_1_code": null, "iso_3_code": "nal", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2118", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mandara", "iso_1_code": null, "iso_3_code": "tbf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2119", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tiang", "iso_1_code": null, "iso_3_code": "tbj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2120", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tigak", "iso_1_code": null, "iso_3_code": "tgc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2121", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2114", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Madak", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Barok", "iso_1_code": null, "iso_3_code": "bjk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2123", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lavatbura-Lamusong", "iso_1_code": null, "iso_3_code": "lbv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2124", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Madak", "iso_1_code": null, "iso_3_code": "mmx", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2125", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2122", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South New Ireland-Northwest Solomonic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Minigir", "iso_1_code": null, "iso_3_code": "bxf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2127", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Choiseul", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Babatana", "iso_1_code": null, "iso_3_code": "baa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2129", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ririo", "iso_1_code": null, "iso_3_code": "rri", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2130", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vaghua", "iso_1_code": null, "iso_3_code": "tva", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2131", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Varisi", "iso_1_code": null, "iso_3_code": "vrs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2132", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2128", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mono-Uruava", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mono", "iso_1_code": null, "iso_3_code": "mte", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2134", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Torau", "iso_1_code": null, "iso_3_code": "ttu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2135", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uruava", "iso_1_code": null, "iso_3_code": "urv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2136", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vinitiri", "iso_1_code": null, "iso_3_code": "vmg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2137", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2133", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nehan-North Bougainville", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Buka", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Petats", "iso_1_code": null, "iso_3_code": "pex", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2140", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Halia", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Hak\u00f6", "iso_1_code": null, "iso_3_code": "hao", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2142", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Halia", "iso_1_code": null, "iso_3_code": "hla", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2143", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2141", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2139", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nehan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Nehan", "iso_1_code": null, "iso_3_code": "nsn", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2145", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2144", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Papapana", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Papapana", "iso_1_code": null, "iso_3_code": "ppn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2147", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2146", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saposa-Tinputz", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Hahon", "iso_1_code": null, "iso_3_code": "hah", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2149", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saposa", "iso_1_code": null, "iso_3_code": "sps", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2150", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Teop", "iso_1_code": null, "iso_3_code": "tio", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2151", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tinputz", "iso_1_code": null, "iso_3_code": "tpz", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2152", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2148", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Solos", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Solos", "iso_1_code": null, "iso_3_code": "sol", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2154", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2153", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2138", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "New Georgia", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Vangunu", "iso_1_code": null, "iso_3_code": "mpr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2157", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Marovo", "iso_1_code": null, "iso_3_code": "mvo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2158", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2156", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Ghanongga", "iso_1_code": null, "iso_3_code": "ghn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2160", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hoava", "iso_1_code": null, "iso_3_code": "hoa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2161", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kusaghe", "iso_1_code": null, "iso_3_code": "ksg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2162", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kazukuru", "iso_1_code": null, "iso_3_code": "kzk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2163", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lungga", "iso_1_code": null, "iso_3_code": "lga", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2164", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dughore", "iso_1_code": null, "iso_3_code": "nke", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2165", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Roviana", "iso_1_code": null, "iso_3_code": "rug", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2166", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Simbo", "iso_1_code": null, "iso_3_code": "sbb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2167", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ughele", "iso_1_code": null, "iso_3_code": "uge", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2168", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2159", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2155", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Patpatar-Tolai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Fanamaket", "iso_1_code": null, "iso_3_code": "bjp", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2170", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Patpatar", "iso_1_code": null, "iso_3_code": "gfk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2171", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Guramalum", "iso_1_code": null, "iso_3_code": "grz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2172", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Niwer Mil", "iso_1_code": null, "iso_3_code": "hrc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2173", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Warwar Feni", "iso_1_code": null, "iso_3_code": "hrw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2174", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Konomala", "iso_1_code": null, "iso_3_code": "koa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2175", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kandas", "iso_1_code": null, "iso_3_code": "kqw", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2176", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kuanua", "iso_1_code": null, "iso_3_code": "ksd", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2177", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Label", "iso_1_code": null, "iso_3_code": "lbb", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2178", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ramoaaina", "iso_1_code": null, "iso_3_code": "rai", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2179", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sursurunga", "iso_1_code": null, "iso_3_code": "sgz", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2180", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Siar-Lak", "iso_1_code": null, "iso_3_code": "sjr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2181", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2169", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Piva-Banoni", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bannoni", "iso_1_code": null, "iso_3_code": "bcm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2183", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lawunuia", "iso_1_code": null, "iso_3_code": "tgi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2184", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2182", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Santa Isabel", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Blablanga", "iso_1_code": null, "iso_3_code": "blp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2187", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zazao", "iso_1_code": null, "iso_3_code": "jaj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2188", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kokota", "iso_1_code": null, "iso_3_code": "kkk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2189", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2186", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gao", "iso_1_code": null, "iso_3_code": "gga", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2191", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cheke Holo", "iso_1_code": null, "iso_3_code": "mrn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2192", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2190", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Zabana", "iso_1_code": null, "iso_3_code": "kji", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2194", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Laghu", "iso_1_code": null, "iso_3_code": "lgb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2195", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2193", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2185", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2126", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tabar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lihir", "iso_1_code": null, "iso_3_code": "lih", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2197", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Notsi", "iso_1_code": null, "iso_3_code": "ncf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2198", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2196", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tomoip", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tomoip", "iso_1_code": null, "iso_3_code": "tqp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2200", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2199", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2113", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Willaumez", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bulu", "iso_1_code": null, "iso_3_code": "bjl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2202", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bola", "iso_1_code": null, "iso_3_code": "bnp", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2203", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Meramera", "iso_1_code": null, "iso_3_code": "mxm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2204", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nakanai", "iso_1_code": null, "iso_3_code": "nak", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2205", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2201", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2109", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North New Guinea", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Huon Gulf", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Markham", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Lower", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Busu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Duwet", "iso_1_code": null, "iso_3_code": "gve", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2211", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aribwatsa", "iso_1_code": null, "iso_3_code": "laz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2212", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Musom", "iso_1_code": null, "iso_3_code": "msu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2213", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nafi", "iso_1_code": null, "iso_3_code": "srf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2214", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aribwaung", "iso_1_code": null, "iso_3_code": "ylu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2215", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2210", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Labu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Labu", "iso_1_code": null, "iso_3_code": "lbu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2217", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2216", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wampar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Wampar", "iso_1_code": null, "iso_3_code": "lbq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2219", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2218", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2209", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Upper", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Adzera", "iso_1_code": null, "iso_3_code": "adz", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2221", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mountain", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mari", "iso_1_code": null, "iso_3_code": "hob", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2223", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wampur", "iso_1_code": null, "iso_3_code": "waz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2224", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sarasira", "iso_1_code": null, "iso_3_code": "zsa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2225", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sukurum", "iso_1_code": null, "iso_3_code": "zsu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2226", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2222", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2220", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Watut", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kodut, South", "iso_1_code": null, "iso_3_code": "mcy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2228", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kodut, Middle", "iso_1_code": null, "iso_3_code": "mpl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2229", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kodut, North", "iso_1_code": null, "iso_3_code": "una", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2230", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2227", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2208", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bugawac", "iso_1_code": null, "iso_3_code": "buk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2232", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yabem", "iso_1_code": null, "iso_3_code": "jae", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2233", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kala", "iso_1_code": null, "iso_3_code": "kcl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2234", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2231", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Numbami", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Numbami", "iso_1_code": null, "iso_3_code": "sij", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2236", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2235", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Hote-Buang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Buang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Buang, Central", "iso_1_code": null, "iso_3_code": "bzh", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2240", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Buang, Mangga", "iso_1_code": null, "iso_3_code": "mmo", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2241", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Piu", "iso_1_code": null, "iso_3_code": "pix", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2242", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kapin", "iso_1_code": null, "iso_3_code": "tbx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2243", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vehes", "iso_1_code": null, "iso_3_code": "val", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2244", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mumeng", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Dambi", "iso_1_code": null, "iso_3_code": "dac", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2246", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gorakor", "iso_1_code": null, "iso_3_code": "goc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2247", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kumalu", "iso_1_code": null, "iso_3_code": "ksl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2248", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Patep", "iso_1_code": null, "iso_3_code": "ptp", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2249", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zenag", "iso_1_code": null, "iso_3_code": "zeg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2250", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2245", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2239", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hote", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Malei", "iso_1_code": null, "iso_3_code": "hot", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2252", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yamap", "iso_1_code": null, "iso_3_code": "ymp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2253", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2251", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2238", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaiwa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Iwal", "iso_1_code": null, "iso_3_code": "kbm", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2255", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2254", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2237", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2207", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngero-Vitiaz", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Ngero", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bariai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bariai", "iso_1_code": null, "iso_3_code": "bch", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2259", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Lusi", "iso_1_code": null, "iso_3_code": "khl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2260", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kove", "iso_1_code": null, "iso_3_code": "kvc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2261", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mala", "iso_1_code": null, "iso_3_code": "mmt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2262", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2258", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tuam", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Gitua", "iso_1_code": null, "iso_3_code": "ggt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2264", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mutu", "iso_1_code": null, "iso_3_code": "tuc", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2265", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "2263", - "scripts": [], - "own_tokenizer": false - } + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, + "node_i": "2263", + "native_tokenizers": [], + "scripts": [] + } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2257", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vitiaz", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bel", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Astrolabe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Awad Bing", "iso_1_code": null, "iso_3_code": "bcu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2269", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mindiri", "iso_1_code": null, "iso_3_code": "mpn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2270", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yote", "iso_1_code": null, "iso_3_code": "wab", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2271", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2268", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nuclear Bel", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bilbil", "iso_1_code": null, "iso_3_code": "brz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2274", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gedaged", "iso_1_code": null, "iso_3_code": "gdd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2275", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Matukar Panau", "iso_1_code": null, "iso_3_code": "mjk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2276", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Takia", "iso_1_code": null, "iso_3_code": "tbc", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2277", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2273", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Marik", "iso_1_code": null, "iso_3_code": "dad", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2279", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2278", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2272", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2267", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kilenge-Maleu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Maleu-Kilenge", "iso_1_code": null, "iso_3_code": "mgl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2281", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2280", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Korap", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Arop-Lokep", "iso_1_code": null, "iso_3_code": "apr", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2283", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Karnai", "iso_1_code": null, "iso_3_code": "bbv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2284", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pano", "iso_1_code": null, "iso_3_code": "mqz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2285", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mur Pano", "iso_1_code": null, "iso_3_code": "tkv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2286", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2282", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mangap-Mbula", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Mbula", "iso_1_code": null, "iso_3_code": "mna", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2288", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2287", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mengen", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Mamusi", "iso_1_code": null, "iso_3_code": "kdf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2290", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mengen", "iso_1_code": null, "iso_3_code": "mee", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2291", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Lote", "iso_1_code": null, "iso_3_code": "uvl", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2292", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2289", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Roinji-Nenaya", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Mato", "iso_1_code": null, "iso_3_code": "met", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2294", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Uma", "iso_1_code": null, "iso_3_code": "roe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2295", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2293", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sio", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Sio", "iso_1_code": null, "iso_3_code": "xsi", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2297", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2296", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southwest New Britain", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Amara", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Amara", "iso_1_code": null, "iso_3_code": "aie", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2300", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2299", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Arawe-Pasismanua", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Arawe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Mangseng", "iso_1_code": null, "iso_3_code": "mbh", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2303", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "East Arawe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Akolet", "iso_1_code": null, "iso_3_code": "akt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2305", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Avau", "iso_1_code": null, "iso_3_code": "avb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2306", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bebeli", "iso_1_code": null, "iso_3_code": "bek", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2307", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Amio-Gelimi", "iso_1_code": null, "iso_3_code": "let", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2308", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2304", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Arawe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Solong", "iso_1_code": null, "iso_3_code": "aaw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2310", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ambul", "iso_1_code": null, "iso_3_code": "apo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2311", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gimi", "iso_1_code": null, "iso_3_code": "gip", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2312", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aiklep", "iso_1_code": null, "iso_3_code": "mwg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2313", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2309", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2302", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pasismanua", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Aighon", "iso_1_code": null, "iso_3_code": "aix", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2315", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miu", "iso_1_code": null, "iso_3_code": "mpo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2316", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaulong", "iso_1_code": null, "iso_3_code": "pss", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2317", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sengseng", "iso_1_code": null, "iso_3_code": "ssz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2318", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karore", "iso_1_code": null, "iso_3_code": "xkx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2319", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2314", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2301", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bibling", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lamogai", "iso_1_code": null, "iso_3_code": "lmg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2321", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mouk-Aria", "iso_1_code": null, "iso_3_code": "mwh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2322", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2320", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2298", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tami", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tami", "iso_1_code": null, "iso_3_code": "tmy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2324", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2323", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2266", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2256", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sarmi-Jayapura Bay", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jayapura Bay", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kayupulau", "iso_1_code": null, "iso_3_code": "kzu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2327", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ormu", "iso_1_code": null, "iso_3_code": "orz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2328", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tobati", "iso_1_code": null, "iso_3_code": "tti", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2329", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2326", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sarmi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Anus", "iso_1_code": null, "iso_3_code": "auq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2331", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bonggo", "iso_1_code": null, "iso_3_code": "bpg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2332", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Masimasi", "iso_1_code": null, "iso_3_code": "ism", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2333", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaptiau", "iso_1_code": null, "iso_3_code": "kbi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2334", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Liki", "iso_1_code": null, "iso_3_code": "lio", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2335", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Fedan", "iso_1_code": null, "iso_3_code": "pdn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2336", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sobei", "iso_1_code": null, "iso_3_code": "sob", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2337", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tarpia", "iso_1_code": null, "iso_3_code": "tpf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2338", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mo", "iso_1_code": null, "iso_3_code": "wkd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2339", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sunum", "iso_1_code": null, "iso_3_code": "ymn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2340", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yarsun", "iso_1_code": null, "iso_3_code": "yrs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2341", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2330", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2325", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Schouten", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kairiru-Manam", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kairiru", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kaiep", "iso_1_code": null, "iso_3_code": "kbw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2345", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kairiru", "iso_1_code": null, "iso_3_code": "kxa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2346", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Terebu", "iso_1_code": null, "iso_3_code": "trb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2347", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2344", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manam", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Biem", "iso_1_code": null, "iso_3_code": "bmc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2349", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kis", "iso_1_code": null, "iso_3_code": "kis", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2350", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Medebur", "iso_1_code": null, "iso_3_code": "mjm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2351", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manam", "iso_1_code": null, "iso_3_code": "mva", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2352", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sepa", "iso_1_code": null, "iso_3_code": "spe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2353", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wogeo", "iso_1_code": null, "iso_3_code": "woc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2354", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2348", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2343", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Siau", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Orop", "iso_1_code": null, "iso_3_code": "aps", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2356", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malol", "iso_1_code": null, "iso_3_code": "mbk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2357", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sera", "iso_1_code": null, "iso_3_code": "sry", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2358", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Essono", "iso_1_code": null, "iso_3_code": "sso", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2359", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ulau-Suain", "iso_1_code": null, "iso_3_code": "svb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2360", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tumleo", "iso_1_code": null, "iso_3_code": "tmq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2361", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kap", "iso_1_code": null, "iso_3_code": "ykm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2362", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2355", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2342", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2206", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Papuan Tip", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Nuclear", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Maisin", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Maisin", "iso_1_code": null, "iso_3_code": "mbq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2366", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2365", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North Papuan Mainland-D\u2019Entrecasteaux", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Anuki", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Anuki", "iso_1_code": null, "iso_3_code": "aui", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2369", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2368", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Are-Taupota", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Are", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Miniafia Oyan", "iso_1_code": null, "iso_3_code": "aai", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2372", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ghayavi", "iso_1_code": null, "iso_3_code": "bmk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2373", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Doga", "iso_1_code": null, "iso_3_code": "dgg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2374", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Are", "iso_1_code": null, "iso_3_code": "mwc", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2375", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Gapapaiwa", "iso_1_code": null, "iso_3_code": "pwg", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2376", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ubir", "iso_1_code": null, "iso_3_code": "ubr", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2377", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kaninuwa", "iso_1_code": null, "iso_3_code": "wat", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2378", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2371", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Taupota", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Gweda", "iso_1_code": null, "iso_3_code": "grw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2380", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Haigwai", "iso_1_code": null, "iso_3_code": "hgw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2381", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maiwala", "iso_1_code": null, "iso_3_code": "mum", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2382", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Minaveha", "iso_1_code": null, "iso_3_code": "mvn", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2383", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tawala", "iso_1_code": null, "iso_3_code": "tbo", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2384", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Taupota", "iso_1_code": null, "iso_3_code": "tpa", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2385", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Wa\u2019ema", "iso_1_code": null, "iso_3_code": "wag", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2386", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wedau", "iso_1_code": null, "iso_3_code": "wed", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2387", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yakaikeke", "iso_1_code": null, "iso_3_code": "ykk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2388", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2379", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2370", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bwaidoga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bwaidoka", "iso_1_code": null, "iso_3_code": "bwd", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2390", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Goodenough, West", "iso_1_code": null, "iso_3_code": "ddi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2391", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koluwawa", "iso_1_code": null, "iso_3_code": "klx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2392", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Molima", "iso_1_code": null, "iso_3_code": "mox", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2393", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Maiadomu", "iso_1_code": null, "iso_3_code": "mzz", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2394", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Iduna", "iso_1_code": null, "iso_3_code": "viv", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2395", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Iamalele", "iso_1_code": null, "iso_3_code": "yml", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2396", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2389", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dobu-Duau", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bunama", "iso_1_code": null, "iso_3_code": "bdd", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2398", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Boselewa", "iso_1_code": null, "iso_3_code": "bwf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2399", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dobu", "iso_1_code": null, "iso_3_code": "dob", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2400", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Duau", "iso_1_code": null, "iso_3_code": "dva", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2401", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Galeya", "iso_1_code": null, "iso_3_code": "gar", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2402", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mwatebu", "iso_1_code": null, "iso_3_code": "mwa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2403", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sewa Bay", "iso_1_code": null, "iso_3_code": "sew", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2404", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2397", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gumawana", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gumawana", "iso_1_code": null, "iso_3_code": "gvs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2406", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2405", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kakabai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Dawawa", "iso_1_code": null, "iso_3_code": "dww", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2408", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kakabai", "iso_1_code": null, "iso_3_code": "kqf", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2409", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2407", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "2367", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Suauic", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "2367", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Suauic", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Buhutu", "iso_1_code": null, "iso_3_code": "bxh", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2411", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "\u2019Auhelawa", "iso_1_code": null, "iso_3_code": "kud", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2412", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Oya\u2019oya", "iso_1_code": null, "iso_3_code": "oyy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2413", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saliba", "iso_1_code": null, "iso_3_code": "sbe", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2414", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Suau", "iso_1_code": null, "iso_3_code": "swp", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2415", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bwanabwana", "iso_1_code": null, "iso_3_code": "tte", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2416", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Unubahe", "iso_1_code": null, "iso_3_code": "unu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2417", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wagawaga", "iso_1_code": null, "iso_3_code": "wgb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2418", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yaleba", "iso_1_code": null, "iso_3_code": "ylb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2419", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2410", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2364", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Peripheral", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Central Papuan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Oumic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ouma", "iso_1_code": null, "iso_3_code": "oum", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2423", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Magoric", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bina", "iso_1_code": null, "iso_3_code": "bmn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2425", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yoba", "iso_1_code": null, "iso_3_code": "yob", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2426", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Magori", "iso_1_code": null, "iso_3_code": "zgr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2427", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2424", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2422", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sinagoro-Keapara", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Vula\u2019a", "iso_1_code": null, "iso_3_code": "hul", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2429", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Keapara", "iso_1_code": null, "iso_3_code": "khz", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2430", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Motu", "iso_1_code": null, "iso_3_code": "meu", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2431", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sinaugoro", "iso_1_code": null, "iso_3_code": "snc", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2432", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2428", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Central Papuan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Gabadi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Abadi", "iso_1_code": null, "iso_3_code": "kbt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2435", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2434", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nuclear", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Toura", "iso_1_code": null, "iso_3_code": "don", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2437", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuni", "iso_1_code": null, "iso_3_code": "kse", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2438", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mekeo", "iso_1_code": null, "iso_3_code": "mek", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2439", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Lala", "iso_1_code": null, "iso_3_code": "nrz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2440", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waima", "iso_1_code": null, "iso_3_code": "rro", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2441", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2436", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2433", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2421", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kilivila-Louisiades", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kilivila", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Budibud", "iso_1_code": null, "iso_3_code": "btp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2444", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kilivila", "iso_1_code": null, "iso_3_code": "kij", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2445", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Muyuw", "iso_1_code": null, "iso_3_code": "myw", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2446", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2443", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Misima", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Misima-Panaeati", "iso_1_code": null, "iso_3_code": "mpx", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2448", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2447", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nimoa-Sudest", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Rifao", "iso_1_code": null, "iso_3_code": "nmw", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2450", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sudest", "iso_1_code": null, "iso_3_code": "tgo", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2451", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2449", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2442", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2420", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2363", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2108", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yapese", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Yapese", "iso_1_code": null, "iso_3_code": "yap", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2453", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2452", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1739", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South Halmahera-West New Guinea", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "South Halmahera", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Irarutu", "iso_1_code": null, "iso_3_code": "irh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2456", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East Makian-Gane", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gane", "iso_1_code": null, "iso_3_code": "gzn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2458", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makian, East", "iso_1_code": null, "iso_3_code": "mky", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2459", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2457", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southeast", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Buli", "iso_1_code": null, "iso_3_code": "bzq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2461", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maba", "iso_1_code": null, "iso_3_code": "mqa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2462", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Patani", "iso_1_code": null, "iso_3_code": "ptn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2463", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sawai", "iso_1_code": null, "iso_3_code": "szw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2464", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2460", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2455", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West New Guinea", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bomberai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bedoanas", "iso_1_code": null, "iso_3_code": "bed", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2467", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Erokwanas", "iso_1_code": null, "iso_3_code": "erw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2468", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2466", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cenderawasih Bay", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Biakic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Biak", "iso_1_code": null, "iso_3_code": "bhw", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2471", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Dusner", "iso_1_code": null, "iso_3_code": "dsn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2472", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Meoswar", "iso_1_code": null, "iso_3_code": "mvx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2473", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2470", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Iresim", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yeresiam", "iso_1_code": null, "iso_3_code": "ire", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2475", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2474", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mor", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mor", "iso_1_code": null, "iso_3_code": "mhz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2477", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2476", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Raja Ampat", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "As", "iso_1_code": null, "iso_3_code": "asz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2479", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Biga", "iso_1_code": null, "iso_3_code": "bhc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2480", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gebe", "iso_1_code": null, "iso_3_code": "gei", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2481", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kawe", "iso_1_code": null, "iso_3_code": "kgb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2482", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Legenyem", "iso_1_code": null, "iso_3_code": "lcc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2483", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ma\u2019ya", "iso_1_code": null, "iso_3_code": "slz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2484", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ambel", "iso_1_code": null, "iso_3_code": "wgo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2485", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wauyai", "iso_1_code": null, "iso_3_code": "wuy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2486", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Matbat", "iso_1_code": null, "iso_3_code": "xmt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2487", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Salawati", "iso_1_code": null, "iso_3_code": "xmx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2488", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2478", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tandia", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tandia", "iso_1_code": null, "iso_3_code": "tni", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2490", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2489", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waropen", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Waropen", "iso_1_code": null, "iso_3_code": "wrp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2492", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2491", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yapen", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Central-Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Ambai", "iso_1_code": null, "iso_3_code": "amk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2495", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ansus", "iso_1_code": null, "iso_3_code": "and", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2496", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Busami", "iso_1_code": null, "iso_3_code": "bsm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2497", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Munggui", "iso_1_code": null, "iso_3_code": "mth", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2498", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Marau", "iso_1_code": null, "iso_3_code": "mvr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2499", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pom", "iso_1_code": null, "iso_3_code": "pmo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2500", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Papuma", "iso_1_code": null, "iso_3_code": "ppm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2501", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Roon", "iso_1_code": null, "iso_3_code": "rnn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2502", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Serui-Laut", "iso_1_code": null, "iso_3_code": "seu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2503", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wamesa", "iso_1_code": null, "iso_3_code": "wad", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2504", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Woi", "iso_1_code": null, "iso_3_code": "wbw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2505", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2494", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kurudu", "iso_1_code": null, "iso_3_code": "kjr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2507", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wabo", "iso_1_code": null, "iso_3_code": "wbb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2508", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2506", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2493", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yaur", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yaur", "iso_1_code": null, "iso_3_code": "jau", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2510", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2509", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yeretuar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yeretuar", "iso_1_code": null, "iso_3_code": "gop", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2512", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2511", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2469", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2465", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2454", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1738", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hukumina", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hukumina", "iso_1_code": null, "iso_3_code": "huw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2514", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2513", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North Bomberai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Arguni", "iso_1_code": null, "iso_3_code": "agf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2516", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Onin", "iso_1_code": null, "iso_3_code": "oni", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2517", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sekar", "iso_1_code": null, "iso_3_code": "skz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2518", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uruangnirin", "iso_1_code": null, "iso_3_code": "urn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2519", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2515", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South Bomberai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kowiai", "iso_1_code": null, "iso_3_code": "kwh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2521", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2520", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southeast Maluku", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kei-Tanimbar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kei-Fordata", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Fordata", "iso_1_code": null, "iso_3_code": "frd", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2525", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kei", "iso_1_code": null, "iso_3_code": "kei", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2526", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2524", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yamdena", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yamdena", "iso_1_code": null, "iso_3_code": "jmd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2528", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2527", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2523", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Selaru", "iso_1_code": null, "iso_3_code": "slu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2530", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Seluwasan", "iso_1_code": null, "iso_3_code": "sws", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2531", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2529", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2522", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sumba-Hawu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Hawu-Dhao", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Hawu", "iso_1_code": null, "iso_3_code": "hvn", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2534", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Dhao", "iso_1_code": null, "iso_3_code": "nfa", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2535", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2533", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sumba", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Anakalangu", "iso_1_code": null, "iso_3_code": "akg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2537", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kodi", "iso_1_code": null, "iso_3_code": "kod", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2538", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lamboya", "iso_1_code": null, "iso_3_code": "lmy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2539", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Loura", "iso_1_code": null, "iso_3_code": "lur", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2540", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mamboru", "iso_1_code": null, "iso_3_code": "mvd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2541", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wejewa", "iso_1_code": null, "iso_3_code": "wew", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2542", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Wanukaka", "iso_1_code": null, "iso_3_code": "wnk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2543", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kambera", "iso_1_code": null, "iso_3_code": "xbr", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2544", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2536", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2532", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teor-Kur", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kur", "iso_1_code": null, "iso_3_code": "kuv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2546", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teor", "iso_1_code": null, "iso_3_code": "tev", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2547", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2545", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Timor-Babar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Nuclear Timor", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Galolen", "iso_1_code": null, "iso_3_code": "gal", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2550", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Habun", "iso_1_code": null, "iso_3_code": "hbu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2551", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Helong", "iso_1_code": null, "iso_3_code": "heg", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2552", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Idat\u00e9", "iso_1_code": null, "iso_3_code": "idt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2553", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kemak", "iso_1_code": null, "iso_3_code": "kem", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2554", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kairui-Midiki", "iso_1_code": null, "iso_3_code": "krd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2555", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lakalei", "iso_1_code": null, "iso_3_code": "lka", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2556", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makuva", "iso_1_code": null, "iso_3_code": "lva", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2557", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mambae", "iso_1_code": null, "iso_3_code": "mgm", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2558", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nauete", "iso_1_code": null, "iso_3_code": "nxa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2559", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tetun", "iso_1_code": null, "iso_3_code": "tet", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2560", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tukudede", "iso_1_code": null, "iso_3_code": "tkd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2561", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Welaun", "iso_1_code": null, "iso_3_code": "wlh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2562", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waima\u2019a", "iso_1_code": null, "iso_3_code": "wmh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2563", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rote", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bilba", "iso_1_code": null, "iso_3_code": "bpz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2565", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dengka", "iso_1_code": null, "iso_3_code": "dnk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2566", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lole", "iso_1_code": null, "iso_3_code": "llg", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2567", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Rikou", "iso_1_code": null, "iso_3_code": "rgu", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2568", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Dela-Oenale", "iso_1_code": null, "iso_3_code": "row", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2569", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Termanu", "iso_1_code": null, "iso_3_code": "twu", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2570", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tii", "iso_1_code": null, "iso_3_code": "txq", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2571", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2564", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uab Meto", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Amarasi", "iso_1_code": null, "iso_3_code": "aaz", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2573", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Uab Meto", "iso_1_code": null, "iso_3_code": "aoz", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2574", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Baikeno", "iso_1_code": null, "iso_3_code": "bkx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2575", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2572", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2549", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southwest Maluku", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "East Damar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Damar, East", "iso_1_code": null, "iso_3_code": "dmr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2578", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2577", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kisar-Roma", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kisar", "iso_1_code": null, "iso_3_code": "kje", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2580", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Roma", "iso_1_code": null, "iso_3_code": "rmm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2581", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2579", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Luang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Luang", "iso_1_code": null, "iso_3_code": "lex", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2583", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Leti", "iso_1_code": null, "iso_3_code": "lti", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2584", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2582", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teun-Nila-Serua", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nila-Serua", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nila", "iso_1_code": null, "iso_3_code": "nil", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2587", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Serua", "iso_1_code": null, "iso_3_code": "srw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2588", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2586", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teun", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Te\u2019un", "iso_1_code": null, "iso_3_code": "tve", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2590", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2589", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2585", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wetar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Atauran", "iso_1_code": null, "iso_3_code": "adb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2592", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aputai", "iso_1_code": null, "iso_3_code": "apx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2593", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ili\u2019uun", "iso_1_code": null, "iso_3_code": "ilu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2594", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tugun", "iso_1_code": null, "iso_3_code": "tzn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2595", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Perai", "iso_1_code": null, "iso_3_code": "wet", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2596", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2591", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2576", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2548", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kuri", "iso_1_code": null, "iso_3_code": "nbn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2598", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2597", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Damar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Damar, West", "iso_1_code": null, "iso_3_code": "drn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2600", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2599", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1589", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chamorro", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Chamorro", "iso_1_code": "ch", "iso_3_code": "cha", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2602", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2601", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Greater Barito", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Barito-Mahakam", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ampanang", "iso_1_code": null, "iso_3_code": "apg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2605", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tunjung", "iso_1_code": null, "iso_3_code": "tjg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2606", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2604", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Central-South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dusun Deyah", "iso_1_code": null, "iso_3_code": "dun", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2610", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2609", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Dusun Malang", "iso_1_code": null, "iso_3_code": "duq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2612", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dusun Witu", "iso_1_code": null, "iso_3_code": "duw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2613", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ma\u2019anyan", "iso_1_code": null, "iso_3_code": "mhy", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2614", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Paku", "iso_1_code": null, "iso_3_code": "pku", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2615", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2611", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2608", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malagasy", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Malagasy, Bara", "iso_1_code": "mg", "iso_3_code": "bhr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2617", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malagasy, Northern Betsimisaraka", "iso_1_code": "mg", "iso_3_code": "bmm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2618", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bushi", "iso_1_code": null, "iso_3_code": "buc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2619", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malagasy, Southern Betsimisaraka", "iso_1_code": "mg", "iso_3_code": "bzc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2620", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malagasy, Masikoro", "iso_1_code": "mg", "iso_3_code": "msh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2621", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malagasy, Merina", "iso_1_code": "mg", "iso_3_code": "plt", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2622", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Malagasy, Sakalava", "iso_1_code": "mg", "iso_3_code": "skg", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2623", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Malagasy, Tandroy-Mahafaly", "iso_1_code": "mg", "iso_3_code": "tdx", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2624", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Malagasy, Tesaka", "iso_1_code": "mg", "iso_3_code": "tkg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2625", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malagasy, Tanosy", "iso_1_code": "mg", "iso_3_code": "txy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2626", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malagasy, Antankarana", "iso_1_code": "mg", "iso_3_code": "xmv", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2627", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Malagasy, Tsimihety", "iso_1_code": "mg", "iso_3_code": "xmw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2628", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2616", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lawangan", "iso_1_code": null, "iso_3_code": "lbx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2630", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tawoyan", "iso_1_code": null, "iso_3_code": "twy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2631", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2629", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2607", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sama-Bajaw", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Yakan", "iso_1_code": null, "iso_3_code": "yka", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2633", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Abaknon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Inabaknon", "iso_1_code": null, "iso_3_code": "abx", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2635", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2634", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sulu-Borneo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Borneo Coast Bajaw", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bajau, Indonesian", "iso_1_code": null, "iso_3_code": "bdl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2638", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bajau, West Coast", "iso_1_code": null, "iso_3_code": "bdr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2639", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mapun", "iso_1_code": null, "iso_3_code": "sjm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2640", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2637", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Inner Sulu Sama", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Sama, Central", "iso_1_code": null, "iso_3_code": "sml", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2642", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sama, Southern", "iso_1_code": null, "iso_3_code": "ssb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2643", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sama, Balangingih", "iso_1_code": null, "iso_3_code": "sse", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2644", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2641", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western Sulu Sama", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sama, Pangutaran", "iso_1_code": null, "iso_3_code": "slm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2646", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2645", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2636", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2632", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kohin", "iso_1_code": null, "iso_3_code": "kkx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2649", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ot Danum", "iso_1_code": null, "iso_3_code": "otd", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2650", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Siang", "iso_1_code": null, "iso_3_code": "sya", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2651", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2648", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bakumpai", "iso_1_code": null, "iso_3_code": "bkr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2653", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngaju", "iso_1_code": null, "iso_3_code": "nij", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2654", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2652", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2647", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2603", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Greater Central Philippine", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Central Philippine", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ata", "iso_1_code": null, "iso_3_code": "atm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2657", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ayta, Sorsogon", "iso_1_code": null, "iso_3_code": "ays", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2658", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Binukidnon, Northern", "iso_1_code": null, "iso_3_code": "kyn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2659", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Binukidnon, Southern", "iso_1_code": null, "iso_3_code": "mtw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2660", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sulod", "iso_1_code": null, "iso_3_code": "srg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2661", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bikol", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Coastal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Naga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Agta, Katubung", "iso_1_code": null, "iso_3_code": "agk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2665", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Agta, Mt. Iraya", "iso_1_code": null, "iso_3_code": "atl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2666", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bikol, Central", "iso_1_code": null, "iso_3_code": "bcl", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2667", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2664", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Virac", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bikol, Southern Catanduanes", "iso_1_code": null, "iso_3_code": "bln", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2669", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2668", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2663", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Inland", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Agta, Mt. Iriga", "iso_1_code": null, "iso_3_code": "agz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2671", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bikol, West Albay", "iso_1_code": null, "iso_3_code": "fbl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2672", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bikol, Libon", "iso_1_code": null, "iso_3_code": "lbl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2673", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bikol, Miraya", "iso_1_code": null, "iso_3_code": "rbl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2674", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bikol, Buhi\u2019non", "iso_1_code": null, "iso_3_code": "ubl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2675", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Iriga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bikol, Rinconada", "iso_1_code": null, "iso_3_code": "bto", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2677", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2676", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2670", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pandan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bikol, Northern Catanduanes", "iso_1_code": null, "iso_3_code": "cts", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2679", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2678", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2662", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bisayan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Banton", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bantoanon", "iso_1_code": null, "iso_3_code": "bno", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2682", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2681", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cebuan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Cebuano", "iso_1_code": null, "iso_3_code": "ceb", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2684", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2683", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bantayanon", "iso_1_code": null, "iso_3_code": "bfx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2686", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Peripheral", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ati", "iso_1_code": null, "iso_3_code": "atk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2688", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Capiznon", "iso_1_code": null, "iso_3_code": "cps", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2689", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hiligaynon", "iso_1_code": null, "iso_3_code": "hil", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2690", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Masbatenyo", "iso_1_code": null, "iso_3_code": "msb", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2691", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Porohanon", "iso_1_code": null, "iso_3_code": "prh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2692", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2687", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Romblon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Romblomanon", "iso_1_code": null, "iso_3_code": "rol", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2694", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2693", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Warayan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Sorsoganon, Northern", "iso_1_code": null, "iso_3_code": "bks", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2696", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Baybayanon", "iso_1_code": null, "iso_3_code": "bvy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2697", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kinabalian", "iso_1_code": null, "iso_3_code": "cbw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2698", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gubat", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sorsoganon, Southern", "iso_1_code": null, "iso_3_code": "srv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2700", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2699", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Samar-Waray", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Waray-Waray", "iso_1_code": null, "iso_3_code": "war", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2702", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": true + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2701", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2695", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2685", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Surigaonon", "iso_1_code": null, "iso_3_code": "sgd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2704", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tandaganon", "iso_1_code": null, "iso_3_code": "tgn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2705", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Butuan-Tausug", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Butuanon", "iso_1_code": null, "iso_3_code": "btw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2707", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tausug", "iso_1_code": null, "iso_3_code": "tsg", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2708", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2706", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2703", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Caluyanun", "iso_1_code": null, "iso_3_code": "clu", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2710", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Aklan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aklanon", "iso_1_code": null, "iso_3_code": "akl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2712", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malaynon", "iso_1_code": null, "iso_3_code": "mlz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2713", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2711", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kinarayan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kinaray-a", "iso_1_code": null, "iso_3_code": "krj", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2715", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2714", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuyan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ratagnon", "iso_1_code": null, "iso_3_code": "btn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2717", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cuyonon", "iso_1_code": null, "iso_3_code": "cyo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2718", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2716", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Inonhan", "iso_1_code": null, "iso_3_code": "loc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2720", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2719", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2709", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2680", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mamanwa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Minamanwa", "iso_1_code": null, "iso_3_code": "mmn", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2722", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2721", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mansakan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Davawenyo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Davawenyo", "iso_1_code": null, "iso_3_code": "daw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2725", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2724", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Mandaya", "iso_1_code": null, "iso_3_code": "mry", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2727", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mansaka", "iso_1_code": null, "iso_3_code": "msk", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2728", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2726", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kamayo", "iso_1_code": null, "iso_3_code": "kyk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2730", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2729", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Tagakolu", "iso_1_code": null, "iso_3_code": "klg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2732", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kalagan, Kagan", "iso_1_code": null, "iso_3_code": "kll", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2733", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kalagan", "iso_1_code": null, "iso_3_code": "kqe", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2734", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2731", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2723", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tagalog", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Filipino", "iso_1_code": null, "iso_3_code": "fil", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2736", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": false + "scripts": [ + "Latn" + ] }, { "name": "Tagalog", "iso_1_code": "tl", "iso_3_code": "tgl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2737", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2735", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2656", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Danao", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Magindanao", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Maguindanaon", "iso_1_code": null, "iso_3_code": "mdh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2740", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2739", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maranao-Iranon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Iranun", "iso_1_code": null, "iso_3_code": "ilm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2742", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Iranun", "iso_1_code": null, "iso_3_code": "ilp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2743", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maranao", "iso_1_code": null, "iso_3_code": "mrw", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2744", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2741", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2738", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gorontalo-Mongondow", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Gorontalic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bolango", "iso_1_code": null, "iso_3_code": "bld", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2747", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Buol", "iso_1_code": null, "iso_3_code": "blf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2748", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bintauna", "iso_1_code": null, "iso_3_code": "bne", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2749", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gorontalo", "iso_1_code": null, "iso_3_code": "gor", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2750", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kaidipang", "iso_1_code": null, "iso_3_code": "kzp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2751", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lolak", "iso_1_code": null, "iso_3_code": "llq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2752", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Suwawa", "iso_1_code": null, "iso_3_code": "swu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2753", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2746", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mongondowic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Mongondow", "iso_1_code": null, "iso_3_code": "mog", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2755", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ponosakan", "iso_1_code": null, "iso_3_code": "pns", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2756", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2754", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2745", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manobo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Manobo, Dibabawon", "iso_1_code": null, "iso_3_code": "mbd", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2760", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Manobo, Rajah Kabunsuwan", "iso_1_code": null, "iso_3_code": "mqk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2761", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manobo, Agusan", "iso_1_code": null, "iso_3_code": "msm", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2762", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2759", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ata-Tigwa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Manobo, Ata", "iso_1_code": null, "iso_3_code": "atd", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2765", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Manobo, Matigsalug", "iso_1_code": null, "iso_3_code": "mbt", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2766", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2764", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Obo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Manobo, Obo", "iso_1_code": null, "iso_3_code": "obo", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2768", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2767", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2763", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Manobo, Western Bukidnon", "iso_1_code": null, "iso_3_code": "mbb", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2770", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Manobo, Ilianen", "iso_1_code": null, "iso_3_code": "mbi", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2771", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2769", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "2758", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "North", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "2758", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "North", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Binukid", "iso_1_code": null, "iso_3_code": "bkd", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2773", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kagayanen", "iso_1_code": null, "iso_3_code": "cgc", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2774", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Higaonon", "iso_1_code": null, "iso_3_code": "mba", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2775", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manobo, Kinamiging", "iso_1_code": null, "iso_3_code": "mkx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2776", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2772", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Tagabawa", "iso_1_code": null, "iso_3_code": "bgs", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2778", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Manobo, Sarangani", "iso_1_code": null, "iso_3_code": "mbs", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2779", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Manobo, Cotabato", "iso_1_code": null, "iso_3_code": "mta", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2780", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2777", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2757", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Palawanic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bonggi", "iso_1_code": null, "iso_3_code": "bdg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2782", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Batak", "iso_1_code": null, "iso_3_code": "bya", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2783", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Palawano, Central", "iso_1_code": null, "iso_3_code": "plc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2784", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Palawano, Southwest", "iso_1_code": null, "iso_3_code": "plv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2785", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Palawano, Brooke\u2019s Point", "iso_1_code": null, "iso_3_code": "plw", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2786", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Molbog", "iso_1_code": null, "iso_3_code": "pwm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2787", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tagbanwa", "iso_1_code": null, "iso_3_code": "tbw", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2788", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tagbanwa, Central", "iso_1_code": null, "iso_3_code": "tgt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2789", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2781", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South Mangyan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Buhid-Taubuid", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Buhid", "iso_1_code": null, "iso_3_code": "bku", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2792", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bangon", "iso_1_code": null, "iso_3_code": "bnj", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2793", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tawbuid", "iso_1_code": null, "iso_3_code": "twb", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2794", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2791", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hanunoo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Hanunoo", "iso_1_code": null, "iso_3_code": "hnn", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2796", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2795", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2790", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Subanon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Subanon, Western", "iso_1_code": null, "iso_3_code": "suc", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2798", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Subanen, Southern", "iso_1_code": null, "iso_3_code": "laa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2800", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Subanen, Eastern", "iso_1_code": null, "iso_3_code": "sfe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2801", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Subanon, Kolibugan", "iso_1_code": null, "iso_3_code": "skn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2802", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Subanen, Northern", "iso_1_code": null, "iso_3_code": "stb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2803", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Subanen, Central", "iso_1_code": null, "iso_3_code": "syb", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2804", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2799", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2797", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Umiray Dumaget", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Manide", "iso_1_code": null, "iso_3_code": "abd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2806", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Agta, Umiray Dumaget", "iso_1_code": null, "iso_3_code": "due", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2807", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Inagta Alabat", "iso_1_code": null, "iso_3_code": "dul", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2808", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2805", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2655", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Javanese", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Javanese, New Caledonian", "iso_1_code": null, "iso_3_code": "jas", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2810", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Javanese", "iso_1_code": "jv", "iso_3_code": "jav", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2811", + "native_tokenizers": [], "scripts": [ "Latn", "Java" - ], - "own_tokenizer": false + ] }, { "name": "Javanese, Suriname", "iso_1_code": null, "iso_3_code": "jvn", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2812", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Osing", "iso_1_code": null, "iso_3_code": "osi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2813", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tengger", "iso_1_code": null, "iso_3_code": "tes", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2814", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2809", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kalamian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Agutaynen", "iso_1_code": null, "iso_3_code": "agn", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2816", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tagbanwa, Calamian", "iso_1_code": null, "iso_3_code": "tbk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2817", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2815", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lampung", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Lampung Nyo", "iso_1_code": null, "iso_3_code": "abl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2819", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Komering", "iso_1_code": null, "iso_3_code": "kge", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2820", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lampung Api", "iso_1_code": null, "iso_3_code": "ljp", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2821", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2818", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Land Dayak", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Benyadu\u2019", "iso_1_code": null, "iso_3_code": "byd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2823", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sanggau", "iso_1_code": null, "iso_3_code": "scg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2824", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bakati\u2019", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bakati\u2019", "iso_1_code": null, "iso_3_code": "bei", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2826", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bakati\u2019, Rara", "iso_1_code": null, "iso_3_code": "lra", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2827", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bakati\u2019, Sara", "iso_1_code": null, "iso_3_code": "sre", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2828", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2825", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bidayuh", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Core", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bidayuh, Biatah", "iso_1_code": null, "iso_3_code": "bth", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2832", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2831", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sembaan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bidayuh, Tringgus-Sembaan", "iso_1_code": null, "iso_3_code": "trx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2834", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2833", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bidayuh, Bau", "iso_1_code": null, "iso_3_code": "sne", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2836", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2835", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2830", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bidayuh Serian", "iso_1_code": null, "iso_3_code": "sdo", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2838", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2837", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2829", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Jangkang", "iso_1_code": null, "iso_3_code": "djo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2840", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Beginci", "iso_1_code": null, "iso_3_code": "ebc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2841", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gerai", "iso_1_code": null, "iso_3_code": "gef", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2842", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ribun", "iso_1_code": null, "iso_3_code": "rir", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2843", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Semandang", "iso_1_code": null, "iso_3_code": "sdq", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2844", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mateq", "iso_1_code": null, "iso_3_code": "xem", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2845", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2839", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2822", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Madurese", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kangean", "iso_1_code": null, "iso_3_code": "kkv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2847", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Madura", "iso_1_code": null, "iso_3_code": "mad", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2848", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2846", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malayo-Chamic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Chamic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Acehnese", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Aceh", "iso_1_code": null, "iso_3_code": "ace", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2852", + "native_tokenizers": [], "scripts": [ - "Latn", - "Arab" - ], - "own_tokenizer": false + "Arab", + "Latn" + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2851", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Coastal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Haroi", "iso_1_code": null, "iso_3_code": "hro", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2854", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cham", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cham, Western", "iso_1_code": null, "iso_3_code": "cja", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2856", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cham, Eastern", "iso_1_code": null, "iso_3_code": "cjm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2857", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2855", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2853", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Highlands", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bih", "iso_1_code": null, "iso_3_code": "ibh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2859", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jarai", "iso_1_code": null, "iso_3_code": "jra", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2860", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Rade", "iso_1_code": null, "iso_3_code": "rad", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2861", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chru-Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chru", "iso_1_code": null, "iso_3_code": "cje", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2863", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern Cham", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tsat", "iso_1_code": null, "iso_3_code": "huq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2865", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Roglai, Southern", "iso_1_code": null, "iso_3_code": "rgs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2866", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Roglai, Cacgia", "iso_1_code": null, "iso_3_code": "roc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2867", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Roglai, Northern", "iso_1_code": null, "iso_3_code": "rog", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2868", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2864", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2862", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2858", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2850", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malayic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Keninjal", "iso_1_code": null, "iso_3_code": "knl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2870", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kendayan", "iso_1_code": null, "iso_3_code": "knx", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2871", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Urak Lawoi\u2019", "iso_1_code": "ms", "iso_3_code": "urk", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2872", + "native_tokenizers": [], "scripts": [ "Thai" - ], - "own_tokenizer": true + ] }, { "name": "Malayic Dayak", "iso_1_code": null, "iso_3_code": "xdy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2873", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ibanic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Iban", "iso_1_code": null, "iso_3_code": "iba", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2875", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Remun", "iso_1_code": null, "iso_3_code": "lkj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2876", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mualang", "iso_1_code": null, "iso_3_code": "mtd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2877", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Seberuang", "iso_1_code": null, "iso_3_code": "sbx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2878", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2874", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Banjar", "iso_1_code": "ms", "iso_3_code": "bjn", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2880", + "native_tokenizers": [], "scripts": [ "Latn", "Arab" - ], - "own_tokenizer": true + ] }, { "name": "Malay, Bacanese", "iso_1_code": "ms", "iso_3_code": "btj", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2881", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay, Berau", "iso_1_code": "ms", "iso_3_code": "bve", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2882", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay, Bukit", "iso_1_code": "ms", "iso_3_code": "bvu", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2883", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Duano", "iso_1_code": "ms", "iso_3_code": "dup", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2884", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Haji", "iso_1_code": "ms", "iso_3_code": "hji", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2885", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Indonesian", "iso_1_code": "id", "iso_3_code": "ind", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"id\")", "original_lang_name": "indonesian", "original_lang_code": "ind", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2886", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] }, { "name": "Jakun", "iso_1_code": "ms", "iso_3_code": "jak", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2887", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay, Jambi", "iso_1_code": "ms", "iso_3_code": "jax", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2888", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Kubu", "iso_1_code": "ms", "iso_3_code": "kvb", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2889", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Kerinci", "iso_1_code": "ms", "iso_3_code": "kvr", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2890", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Brunei", "iso_1_code": "ms", "iso_3_code": "kxd", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2891", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Sekak", "iso_1_code": "ms", "iso_3_code": "lce", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2892", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Lubu", "iso_1_code": "ms", "iso_3_code": "lcf", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2893", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Col", "iso_1_code": "ms", "iso_3_code": "liw", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2894", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay, Kedah", "iso_1_code": "ms", "iso_3_code": "meo", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2895", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay, Pattani", "iso_1_code": "ms", "iso_3_code": "mfa", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2896", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Bangka", "iso_1_code": "ms", "iso_3_code": "mfb", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2897", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Indonesian, Makassar", "iso_1_code": null, "iso_3_code": "mfp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2898", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Minangkabau", "iso_1_code": "ms", "iso_3_code": "min", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2899", + "native_tokenizers": [], "scripts": [ "Latn", "Arab" - ], - "own_tokenizer": true + ] }, { "name": "Malay, Kota Bangun Kutai", "iso_1_code": "ms", "iso_3_code": "mqg", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2900", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay, Sabah", "iso_1_code": "ms", "iso_3_code": "msi", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2901", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Musi", "iso_1_code": "ms", "iso_3_code": "mui", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2902", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": true + ] }, { "name": "Orang Kanaq", "iso_1_code": "ms", "iso_3_code": "orn", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2903", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Orang Seletar", "iso_1_code": "ms", "iso_3_code": "ors", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2904", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Pekal", "iso_1_code": "ms", "iso_3_code": "pel", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2905", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay, Central", "iso_1_code": "ms", "iso_3_code": "pse", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2906", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": true + ] }, { "name": "Temuan", "iso_1_code": "ms", "iso_3_code": "tmw", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2907", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaur", "iso_1_code": "ms", "iso_3_code": "vkk", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2908", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay, Tenggarong Kutai", "iso_1_code": "ms", "iso_3_code": "vkt", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2909", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay", "iso_1_code": "ms", "iso_3_code": "zlm", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2910", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Negeri Sembilan Malay", "iso_1_code": "ms", "iso_3_code": "zmi", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2911", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay, Standard", "iso_1_code": "ms", "iso_3_code": "zsm", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "children": [], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2912", + "native_tokenizers": [], "scripts": [ "Latn", "Arab" - ], - "own_tokenizer": true + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2879", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2869", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2849", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Minahasan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tonsawang", "iso_1_code": null, "iso_3_code": "tnw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2914", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tontemboan", "iso_1_code": null, "iso_3_code": "tnt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2916", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northeast", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tondano", "iso_1_code": null, "iso_3_code": "tdn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2918", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tombulu", "iso_1_code": null, "iso_3_code": "tom", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2919", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tonsea", "iso_1_code": null, "iso_3_code": "txs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2920", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2917", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2915", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2913", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Moklen", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Moklen", "iso_1_code": null, "iso_3_code": "mkm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2922", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Moken", "iso_1_code": null, "iso_3_code": "mwt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2923", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2921", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nasal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nasal", "iso_1_code": null, "iso_3_code": "nsy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2925", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2924", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North Borneo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Melanau-Kajang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kajang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bukitan", "iso_1_code": null, "iso_3_code": "bkn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2929", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kajaman", "iso_1_code": null, "iso_3_code": "kag", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2930", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lahanan", "iso_1_code": null, "iso_3_code": "lhn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2931", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Punan Batu", "iso_1_code": null, "iso_3_code": "pnm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2932", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sekapan", "iso_1_code": null, "iso_3_code": "skp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2933", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sihan", "iso_1_code": null, "iso_3_code": "spg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2934", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ukit", "iso_1_code": null, "iso_3_code": "umi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2935", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2928", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Melanau", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Melanau, Daro-Matu", "iso_1_code": null, "iso_3_code": "dro", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2937", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Melanau, Kanowit-Tanjong", "iso_1_code": null, "iso_3_code": "kxn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2938", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Melanau, Central", "iso_1_code": null, "iso_3_code": "mel", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2939", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Melanau, Sibu", "iso_1_code": null, "iso_3_code": "sdx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2940", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2936", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2927", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North Sarawakan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Berawan-Lower Baram", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Berawan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Berawan, West", "iso_1_code": null, "iso_3_code": "zbw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2944", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central-East Berawan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Berawan, Central", "iso_1_code": null, "iso_3_code": "zbc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2946", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Berawan, East", "iso_1_code": null, "iso_3_code": "zbe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2947", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2945", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2943", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lower Baram", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "A", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Belait", "iso_1_code": null, "iso_3_code": "beg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2951", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kiput", "iso_1_code": null, "iso_3_code": "kyi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2952", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2950", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "B", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Lelak", "iso_1_code": null, "iso_3_code": "llk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2954", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Narom", "iso_1_code": null, "iso_3_code": "nrm", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2955", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tutong", "iso_1_code": null, "iso_3_code": "ttg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2956", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2953", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2949", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2948", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2942", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bintulu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Vaie", "iso_1_code": null, "iso_3_code": "bny", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2958", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2957", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dayic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kelabitic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kelabit", "iso_1_code": null, "iso_3_code": "kzi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2961", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lengilu", "iso_1_code": null, "iso_3_code": "lgi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2962", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lundayeh", "iso_1_code": null, "iso_3_code": "lnd", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2963", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Putoh", "iso_1_code": null, "iso_3_code": "put", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2964", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sa\u2019ban", "iso_1_code": null, "iso_3_code": "snv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2965", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tring", "iso_1_code": null, "iso_3_code": "tgq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2966", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2960", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2959", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kayan-Kenyah", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kayanic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kayan Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kayan, Busang", "iso_1_code": null, "iso_3_code": "bfg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2970", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bahau", "iso_1_code": null, "iso_3_code": "bhv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2971", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kayan, Baram", "iso_1_code": null, "iso_3_code": "kys", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2972", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kayan, Rejang", "iso_1_code": null, "iso_3_code": "ree", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2973", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kayan, Wahau", "iso_1_code": null, "iso_3_code": "whu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2974", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kayan Mahakam", "iso_1_code": null, "iso_3_code": "xay", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2975", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kayan, Mendalam", "iso_1_code": null, "iso_3_code": "xkd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2976", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kayan, Kayan River", "iso_1_code": null, "iso_3_code": "xkn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2977", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2969", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Modang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Modang", "iso_1_code": null, "iso_3_code": "mxd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2979", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Segai", "iso_1_code": null, "iso_3_code": "sge", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2980", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2978", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Muller-Schwaner \u2018Punan\u2019", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bukat", "iso_1_code": null, "iso_3_code": "bvk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2982", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hovongan", "iso_1_code": null, "iso_3_code": "hov", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2983", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aoheng", "iso_1_code": null, "iso_3_code": "pni", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2984", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Punan Aput", "iso_1_code": null, "iso_3_code": "pud", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2985", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Punan Merah", "iso_1_code": null, "iso_3_code": "puf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2986", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kereho", "iso_1_code": null, "iso_3_code": "xke", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2987", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2981", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Murik Kayan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Murik", "iso_1_code": null, "iso_3_code": "mxr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2989", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2988", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2968", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kenyah", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kenyah, Mainstream", "iso_1_code": null, "iso_3_code": "xkl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2991", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kayanic Kenyah", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Sebop", "iso_1_code": null, "iso_3_code": "sib", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2993", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Long Wat", "iso_1_code": null, "iso_3_code": "ttw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2994", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kenyah, Wahau", "iso_1_code": null, "iso_3_code": "whk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2995", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2992", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Upper Pujungan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Uma\u2019 Lung", "iso_1_code": null, "iso_3_code": "ulu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2997", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uma\u2019 Lasan", "iso_1_code": null, "iso_3_code": "xky", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2998", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2996", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2990", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Penan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Penan, Eastern", "iso_1_code": null, "iso_3_code": "pez", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3000", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Penan, Western", "iso_1_code": null, "iso_3_code": "pne", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3001", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2999", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2967", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Punan Tubu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Punan Tubu", "iso_1_code": null, "iso_3_code": "puj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3003", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3002", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2941", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rejang-Sajau", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Basap", "iso_1_code": null, "iso_3_code": "bdb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3005", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Burusu", "iso_1_code": null, "iso_3_code": "bqr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3006", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Punan Bah-Biau", "iso_1_code": null, "iso_3_code": "pna", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3007", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Punan Merap", "iso_1_code": null, "iso_3_code": "puc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3008", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sajau Basap", "iso_1_code": null, "iso_3_code": "sjb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3009", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3004", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sabahan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Dusunic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bisaya-Lotud", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bisaya, Sabah", "iso_1_code": null, "iso_3_code": "bsy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3013", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lotud", "iso_1_code": null, "iso_3_code": "dtr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3014", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bisaya, Brunei", "iso_1_code": null, "iso_3_code": "bsb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3016", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3015", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3012", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dusun", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kuijau", "iso_1_code": null, "iso_3_code": "dkr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3018", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rungus", "iso_1_code": null, "iso_3_code": "drg", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3019", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kota Marudu Talantang", "iso_1_code": null, "iso_3_code": "grm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3020", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kimaragang", "iso_1_code": null, "iso_3_code": "kqr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3021", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kadazan, Klias River", "iso_1_code": null, "iso_3_code": "kqt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3022", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tobilung", "iso_1_code": null, "iso_3_code": "tgb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3023", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Central", - "iso_1_code": null, - "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Central", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Kadazan Dusun", "iso_1_code": null, "iso_3_code": "dtp", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3025", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sugut Dusun", "iso_1_code": null, "iso_3_code": "kzs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3026", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Minokok", "iso_1_code": null, "iso_3_code": "mqq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3027", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3024", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kadazan, Labuk-Kinabatangan", "iso_1_code": null, "iso_3_code": "dtb", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3029", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3028", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3017", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dumpas", "iso_1_code": null, "iso_3_code": "dmv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3031", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3030", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3011", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ida\u2019an", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ida\u2019an", "iso_1_code": null, "iso_3_code": "dbj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3033", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3032", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Murutic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Murut", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Okolod", "iso_1_code": null, "iso_3_code": "kqv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3036", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Murut, Keningau", "iso_1_code": null, "iso_3_code": "kxi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3037", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Murut, Tahol", "iso_1_code": null, "iso_3_code": "mvv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3038", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Murut, Paluan", "iso_1_code": null, "iso_3_code": "plz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3039", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Murut, Selungai", "iso_1_code": null, "iso_3_code": "slg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3040", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Murut, Timugon", "iso_1_code": null, "iso_3_code": "tih", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3041", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3035", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Murut, Bookan", "iso_1_code": null, "iso_3_code": "bnb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3043", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3042", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Murut, Kalabakan", "iso_1_code": null, "iso_3_code": "kve", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3045", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Murut, Sembakung", "iso_1_code": null, "iso_3_code": "sbr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3046", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3044", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tidung", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tidung, Southern", "iso_1_code": null, "iso_3_code": "itd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3048", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tidung, Northern", "iso_1_code": null, "iso_3_code": "ntd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3049", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Murut, Serudung", "iso_1_code": null, "iso_3_code": "srk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3050", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3047", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Papar", "iso_1_code": null, "iso_3_code": "dpp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3052", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gana", "iso_1_code": null, "iso_3_code": "gnq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3053", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3051", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3034", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Paitanic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Abai Sungai", "iso_1_code": null, "iso_3_code": "abf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3055", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tombonuo", "iso_1_code": null, "iso_3_code": "txa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3056", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Upper Kinabatangan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kinabatangan, Upper", "iso_1_code": null, "iso_3_code": "dmg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3058", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lobu, Tampias", "iso_1_code": null, "iso_3_code": "low", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3059", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lobu, Lanas", "iso_1_code": null, "iso_3_code": "ruu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3060", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3057", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3054", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tatana", "iso_1_code": null, "iso_3_code": "txx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3062", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3061", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3010", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2926", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North Mangyan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Alangan", "iso_1_code": null, "iso_3_code": "alj", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3064", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Iraya", "iso_1_code": null, "iso_3_code": "iry", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3065", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tadyawan", "iso_1_code": null, "iso_3_code": "tdy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3066", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3063", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern Luzon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Arta", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Arta", "iso_1_code": null, "iso_3_code": "atz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3069", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3068", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ilocano", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Ilocano", "iso_1_code": null, "iso_3_code": "ilo", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3071", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "3070", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Meso-Cordilleran", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "3070", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Meso-Cordilleran", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Alta", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Alta, Southern", "iso_1_code": null, "iso_3_code": "agy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3074", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Alta, Northern", "iso_1_code": null, "iso_3_code": "aqn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3075", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3073", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South-Central Cordilleran", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Central Cordilleran", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Isinai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Isinay", "iso_1_code": null, "iso_3_code": "inn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3079", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3078", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North Central Cordilleran", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kalinga-Itneg", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Itneg", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Itneg, Binongan", "iso_1_code": null, "iso_3_code": "itb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3083", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Itneg, Inlaud", "iso_1_code": null, "iso_3_code": "iti", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3084", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Itneg, Maeng", "iso_1_code": null, "iso_3_code": "itt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3085", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Itneg, Moyadan", "iso_1_code": null, "iso_3_code": "ity", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3086", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Itneg, Masadiit", "iso_1_code": null, "iso_3_code": "tis", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3087", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3082", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kalinga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kalinga, Vanaw", "iso_1_code": null, "iso_3_code": "bjx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3089", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kalinga, Mabaka Valley", "iso_1_code": null, "iso_3_code": "kkg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3090", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kalinga, Majukayang", "iso_1_code": null, "iso_3_code": "kmd", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3091", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kalinga, Limos", "iso_1_code": null, "iso_3_code": "kmk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3092", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kalinga, Tanudan", "iso_1_code": null, "iso_3_code": "kml", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3093", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kalinga, Lubuagan", "iso_1_code": null, "iso_3_code": "knb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3094", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kalinga, Southern", "iso_1_code": null, "iso_3_code": "ksc", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3095", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kalinga, Butbut", "iso_1_code": null, "iso_3_code": "kyb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3096", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3088", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3081", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nuclear Cordilleran", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Balangaw", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Balangao", "iso_1_code": null, "iso_3_code": "blw", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3099", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3098", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bontok-Kankanay", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bontok", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bontok, Eastern", "iso_1_code": null, "iso_3_code": "ebk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3102", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bontok, Central", "iso_1_code": null, "iso_3_code": "lbk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3103", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bontok, Southern", "iso_1_code": null, "iso_3_code": "obk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3104", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bontok, Northern", "iso_1_code": null, "iso_3_code": "rbk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3105", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bontok, Southwestern", "iso_1_code": null, "iso_3_code": "vbk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3106", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3101", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kankanay", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kankanaey", "iso_1_code": null, "iso_3_code": "kne", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3108", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kankanay, Northern", "iso_1_code": null, "iso_3_code": "xnn", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3109", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3107", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3100", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ifugaw", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Ifugao, Amganad", "iso_1_code": null, "iso_3_code": "ifa", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3111", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ifugao, Batad", "iso_1_code": null, "iso_3_code": "ifb", + "children": [], "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3112", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ifugao, Tuwali", "iso_1_code": null, "iso_3_code": "ifk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3113", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ifugao, Mayoyao", "iso_1_code": null, "iso_3_code": "ifu", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3114", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3110", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3097", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3080", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3077", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern Cordilleran", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Ilongot", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bogkalot", "iso_1_code": null, "iso_3_code": "ilk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3117", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3116", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Southern Cordilleran", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Pangasinan", "iso_1_code": null, "iso_3_code": "pag", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3119", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nuclear Southern Cordilleran", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Ibaloy", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ibaloi", "iso_1_code": null, "iso_3_code": "ibl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3122", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "I-wak", "iso_1_code": null, "iso_3_code": "iwk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3123", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3121", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kallahan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kallahan, Keley-i", "iso_1_code": null, "iso_3_code": "ify", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3125", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kalanguya", "iso_1_code": null, "iso_3_code": "kak", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3126", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3124", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karaw", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Karao", "iso_1_code": null, "iso_3_code": "kyj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3128", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3127", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3120", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3118", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3115", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3076", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3072", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern Cordilleran", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Cagayan Valley", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Ibanagic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Atta, Pudtol", "iso_1_code": null, "iso_3_code": "atp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3132", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Atta, Pamplona", "iso_1_code": null, "iso_3_code": "att", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3133", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Atta, Faire", "iso_1_code": null, "iso_3_code": "azt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3134", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ibanag", "iso_1_code": null, "iso_3_code": "ibg", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3135", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Itawit", "iso_1_code": null, "iso_3_code": "itv", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3136", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yogad", "iso_1_code": null, "iso_3_code": "yog", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3137", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gaddangic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Agta, Central Cagayan", "iso_1_code": null, "iso_3_code": "agt", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3139", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Gaddang", "iso_1_code": null, "iso_3_code": "gad", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3140", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ga\ua78cdang", "iso_1_code": null, "iso_3_code": "gdg", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3141", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3138", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3131", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Isnag", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Isnag", "iso_1_code": null, "iso_3_code": "isd", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3143", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Adasen", "iso_1_code": null, "iso_3_code": "tiu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3144", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3142", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3130", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northeastern Luzon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Agta, Pahanan", "iso_1_code": null, "iso_3_code": "apf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3146", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Paranan", "iso_1_code": null, "iso_3_code": "prf", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3147", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Agta, Casiguran Dumagat", "iso_1_code": null, "iso_3_code": "dgc", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3149", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Agta, Dupaninan", "iso_1_code": null, "iso_3_code": "duo", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3150", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Agta, Dicamay", "iso_1_code": null, "iso_3_code": "duy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3151", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kasiguranin", "iso_1_code": null, "iso_3_code": "ksn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3152", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3148", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3145", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3129", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3067", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northwest Sumatra-Barrier Islands", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Enggano", "iso_1_code": null, "iso_3_code": "eno", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3154", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gayo", "iso_1_code": null, "iso_3_code": "gay", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3155", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mentawai", "iso_1_code": null, "iso_3_code": "mwv", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3156", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Simeulue", "iso_1_code": null, "iso_3_code": "smr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3157", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Batak", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Batak Dairi", "iso_1_code": null, "iso_3_code": "btd", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3160", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Batak Karo", "iso_1_code": null, "iso_3_code": "btx", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3161", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Batak Alas-Kluet", "iso_1_code": null, "iso_3_code": "btz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3162", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3159", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Simalungan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Batak Simalungun", "iso_1_code": null, "iso_3_code": "bts", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3164", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3163", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Batak Angkola", "iso_1_code": null, "iso_3_code": "akb", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3166", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Batak Toba", "iso_1_code": null, "iso_3_code": "bbc", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3167", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Batak Mandailing", "iso_1_code": null, "iso_3_code": "btm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3168", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3165", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3158", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nias", "iso_1_code": null, - "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, + "iso_3_code": null, "children": [ { "name": "Nias", "iso_1_code": null, "iso_3_code": "nia", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3170", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sikule", "iso_1_code": null, "iso_3_code": "skh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3171", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3169", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3153", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Palauan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Palauan", "iso_1_code": null, "iso_3_code": "pau", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3173", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3172", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rejang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Rejang", "iso_1_code": null, "iso_3_code": "rej", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3175", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3174", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sangiric", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Sangil", "iso_1_code": null, "iso_3_code": "snl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3178", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sangir", "iso_1_code": null, "iso_3_code": "sxn", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3179", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Talaud", "iso_1_code": null, "iso_3_code": "tld", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3180", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3177", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bantik", "iso_1_code": null, "iso_3_code": "bnq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3182", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ratahan", "iso_1_code": null, "iso_3_code": "rth", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3183", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3181", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3176", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South Sulawesi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bugis", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bugis", "iso_1_code": null, "iso_3_code": "bug", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3186", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Koneq-koneq", "iso_1_code": null, "iso_3_code": "cml", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3187", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tamanic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Embaloh", "iso_1_code": null, "iso_3_code": "emb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3189", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Taman", "iso_1_code": null, "iso_3_code": "tmn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3190", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3188", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3185", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lemolang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Limola", "iso_1_code": null, "iso_3_code": "ley", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3192", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3191", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makassar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bentong", "iso_1_code": null, "iso_3_code": "bnu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3194", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Konjo, Coastal", "iso_1_code": null, "iso_3_code": "kjc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3195", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Konjo, Highland", "iso_1_code": null, "iso_3_code": "kjk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3196", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makasar", "iso_1_code": null, "iso_3_code": "mak", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3197", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Selayar", "iso_1_code": null, "iso_3_code": "sly", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3198", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3193", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Mamuju", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mamuju", "iso_1_code": null, "iso_3_code": "mqx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3201", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3200", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mandar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mandar", "iso_1_code": null, "iso_3_code": "mdr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3203", - "scripts": [], - "own_tokenizer": false - } - ], - "node_i": "3202", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Masenrempulu", - "iso_1_code": null, - "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "native_tokenizers": [], + "scripts": [] } - }, + ], + "tokenizers": {}, + "node_i": "3202", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Masenrempulu", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Malimpung", "iso_1_code": null, "iso_3_code": "mli", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3205", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Duri", "iso_1_code": null, "iso_3_code": "mvp", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3206", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Enrekang", "iso_1_code": null, "iso_3_code": "ptt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3207", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maiwa", "iso_1_code": null, "iso_3_code": "wmm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3208", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3204", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pitu Ulunna Salu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Aralle-Tabulahan", "iso_1_code": null, "iso_3_code": "atq", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3210", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Dakka", "iso_1_code": null, "iso_3_code": "dkk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3211", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pannei", "iso_1_code": null, "iso_3_code": "pnc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3212", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bambam", "iso_1_code": null, "iso_3_code": "ptu", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3213", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ulumanda\u2019", "iso_1_code": null, "iso_3_code": "ulm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3214", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3209", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Toraja-Sa\u2019dan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kalumpang", "iso_1_code": null, "iso_3_code": "kli", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3216", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mamasa", "iso_1_code": null, "iso_3_code": "mqj", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3217", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tae\u2019", "iso_1_code": null, "iso_3_code": "rob", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3218", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Toraja-Sa\u2019dan", "iso_1_code": null, "iso_3_code": "sda", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3219", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Talondo\u2019", "iso_1_code": null, "iso_3_code": "tln", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3220", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3215", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3199", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Seko", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Seko Tengah", "iso_1_code": null, "iso_3_code": "sko", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3222", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Seko Padang", "iso_1_code": null, "iso_3_code": "skx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3223", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Panasuan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Budong-Budong", "iso_1_code": null, "iso_3_code": "bdx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3225", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Panasuan", "iso_1_code": null, "iso_3_code": "psn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3226", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3224", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3221", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3184", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sundanese", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Badui", "iso_1_code": null, "iso_3_code": "bac", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3228", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sunda", "iso_1_code": "su", "iso_3_code": "sun", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3229", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3227", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bulungan", "iso_1_code": null, "iso_3_code": "blj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3231", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gorap", "iso_1_code": null, "iso_3_code": "goq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3232", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3230", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1452", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northwest Formosan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pazeh", "iso_1_code": null, "iso_3_code": "pzh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3234", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kulon", "iso_1_code": null, "iso_3_code": "uon", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3235", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saisiyat", "iso_1_code": null, "iso_3_code": "xsy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3236", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3233", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Paiwan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Paiwan", "iso_1_code": null, "iso_3_code": "pwn", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3238", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3237", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Puyuma", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Puyuma", "iso_1_code": null, "iso_3_code": "pyu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3240", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3239", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rukai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Rukai", "iso_1_code": null, "iso_3_code": "dru", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3242", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3241", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tsouic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Saaroa", "iso_1_code": null, "iso_3_code": "sxr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3244", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tsou", "iso_1_code": null, "iso_3_code": "tsu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3245", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kanakanabu", "iso_1_code": null, "iso_3_code": "xnb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3246", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3243", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ketangalan", "iso_1_code": null, "iso_3_code": "kae", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3248", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3247", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western Plains", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Central Western Plains", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Babuza", "iso_1_code": null, "iso_3_code": "bzg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3251", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Papora-Hoanya", "iso_1_code": null, "iso_3_code": "ppu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3252", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3250", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Thao", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Thao", "iso_1_code": null, "iso_3_code": "ssf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3254", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3253", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3249", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1436", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Aymaran.json b/data/Aymaran.json index 3f259765772f4bc9dd9a9ba67d875d0755e41437..2ccfa4a8043e7a8ee111777c90c4d5560b405065 100644 --- a/data/Aymaran.json +++ b/data/Aymaran.json @@ -2,64 +2,64 @@ "name": "Aymaran", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aymara", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aymara, Southern", "iso_1_code": "ay", "iso_3_code": "ayc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3257", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aymara, Central", "iso_1_code": "ay", "iso_3_code": "ayr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3258", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "3256", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tupe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jaqaru", "iso_1_code": null, "iso_3_code": "jqr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3260", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3259", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3255", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Barbacoan.json b/data/Barbacoan.json index e783f605709fa10c659a97bbc5ddfeaf6835ce3b..0002e9a5533cfe614a8f94b6c88c240945b6cde7 100644 --- a/data/Barbacoan.json +++ b/data/Barbacoan.json @@ -2,68 +2,68 @@ "name": "Barbacoan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Awa-Cuaiquer", "iso_1_code": null, "iso_3_code": "kwi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3263", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "3262", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chachi", "iso_1_code": null, "iso_3_code": "cbi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3265", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tsafiki", "iso_1_code": null, "iso_3_code": "cof", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3266", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "3264", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3261", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Bayono-Awbono.json b/data/Bayono-Awbono.json index 8da445249315bbd016c7711ff93e82a3c7857c51..5f49b9f9d1e92b70f2585239b04396553d3bd3b2 100644 --- a/data/Bayono-Awbono.json +++ b/data/Bayono-Awbono.json @@ -2,30 +2,30 @@ "name": "Bayono-Awbono", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Awbono", "iso_1_code": null, "iso_3_code": "awh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3268", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bayono", "iso_1_code": null, "iso_3_code": "byl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3269", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3267", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Border.json b/data/Border.json index 8547621b8b8ff9764058ec3b7c3e843196cc371a..355a04670d30f82533d371732206d7a97910c587 100644 --- a/data/Border.json +++ b/data/Border.json @@ -2,197 +2,197 @@ "name": "Border", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bewani", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ainbai", "iso_1_code": null, "iso_3_code": "aic", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3272", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kilmeri", "iso_1_code": null, "iso_3_code": "kih", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3273", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ningera", "iso_1_code": null, "iso_3_code": "nby", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3274", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pagi", "iso_1_code": null, "iso_3_code": "pgi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3275", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Umeda", "iso_1_code": null, "iso_3_code": "upi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3276", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3271", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Taikat", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Taikat", "iso_1_code": null, "iso_3_code": "aos", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3278", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Awyi", "iso_1_code": null, "iso_3_code": "auw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3279", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3277", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waris", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Amanab", "iso_1_code": null, "iso_3_code": "amn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3281", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Daonda", "iso_1_code": null, "iso_3_code": "dnd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3282", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Imonda", "iso_1_code": null, "iso_3_code": "imn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3283", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manem", "iso_1_code": null, "iso_3_code": "jet", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3284", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Auwe", "iso_1_code": null, "iso_3_code": "smf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3285", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Viid", "iso_1_code": null, "iso_3_code": "snu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3286", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sowanda", "iso_1_code": null, "iso_3_code": "sow", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3287", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waris", "iso_1_code": null, "iso_3_code": "wrs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3288", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "3280", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3270", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Bororoan.json b/data/Bororoan.json index f2d1e7168aead9d439066d8dc684af2160f7d24a..28942fb24a4e010f1d629e0052e83e16045879a1 100644 --- a/data/Bororoan.json +++ b/data/Bororoan.json @@ -2,42 +2,42 @@ "name": "Bororoan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bor\u00f4ro", "iso_1_code": null, "iso_3_code": "bor", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3290", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Otuke", "iso_1_code": null, "iso_3_code": "otu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3291", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Umot\u00edna", "iso_1_code": null, "iso_3_code": "umo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3292", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3289", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Botocudoan.json b/data/Botocudoan.json index 86bdd6205115b9d94617dfe2121aa73943284dcf..e368231dd56bab7b37a90befd85039d55ad6bfae 100644 --- a/data/Botocudoan.json +++ b/data/Botocudoan.json @@ -2,20 +2,20 @@ "name": "Botocudoan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Krenak", "iso_1_code": null, "iso_3_code": "kqq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3294", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3293", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Caddoan.json b/data/Caddoan.json index fa6ddb4d47d7d3fc4aad44e8937d31c5a49518a5..a57ff5f351b993dc0bf6807bc545b581e7c5d10e 100644 --- a/data/Caddoan.json +++ b/data/Caddoan.json @@ -2,93 +2,93 @@ "name": "Caddoan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Caddo", "iso_1_code": null, "iso_3_code": "cad", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3296", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern Caddoan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Wichita", "iso_1_code": null, "iso_3_code": "wic", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3298", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kitsai-Proto-Pawnee", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kitsai", "iso_1_code": null, "iso_3_code": "kii", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3300", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Proto-Pawnee", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Arikara", "iso_1_code": null, "iso_3_code": "ari", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3302", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pawnee", "iso_1_code": null, "iso_3_code": "paw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3303", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3301", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3299", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3297", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3295", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Cahuapanan.json b/data/Cahuapanan.json index 8c4d970d29e042d809c0227f83349fa48ce61ec3..566ccd4384bf7fe8d51c2cd9ce17f0cda8363665 100644 --- a/data/Cahuapanan.json +++ b/data/Cahuapanan.json @@ -2,32 +2,32 @@ "name": "Cahuapanan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Shawi", "iso_1_code": null, "iso_3_code": "cbt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3305", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Jebero", "iso_1_code": null, "iso_3_code": "jeb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3306", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3304", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Cariban.json b/data/Cariban.json index d9a8df9c09bd165d186c6ccc01f41675443ccb62..a5fa3d4cfcf77f27815dbd17c6331803910e30e3 100644 --- a/data/Cariban.json +++ b/data/Cariban.json @@ -2,569 +2,569 @@ "name": "Cariban", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Carib", "iso_1_code": null, "iso_3_code": "car", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3308", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Apala\u00ed", "iso_1_code": null, "iso_3_code": "apy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3310", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Cuman\u00e1", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chaima", "iso_1_code": null, "iso_3_code": "ciy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3312", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cumanagoto", "iso_1_code": null, "iso_3_code": "cuo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3313", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3311", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makiritare", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Maquiritari", "iso_1_code": null, "iso_3_code": "mch", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3315", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3314", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mapoyo-Yavarana", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mapoyo", "iso_1_code": null, "iso_3_code": "mcg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3317", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "P\u00e9mono", "iso_1_code": null, "iso_3_code": "pev", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3318", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tamanaku", "iso_1_code": null, "iso_3_code": "tmz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3319", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yabarana", "iso_1_code": null, "iso_3_code": "yar", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3320", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3316", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wayana", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kaxui\u00e2na", "iso_1_code": null, "iso_3_code": "kbb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3322", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wayana", "iso_1_code": null, "iso_3_code": "way", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3323", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "3321", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3309", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kashuyana", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sikiana", "iso_1_code": null, "iso_3_code": "sik", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3325", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3324", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North Amazonian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pem\u00f3n", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pem\u00f3n proper", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pemon", "iso_1_code": null, "iso_3_code": "aoc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3329", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Macushi", "iso_1_code": null, "iso_3_code": "mbc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3330", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kapong", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Akawaio", "iso_1_code": null, "iso_3_code": "ake", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3332", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Patamona", "iso_1_code": null, "iso_3_code": "pbc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3333", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "3331", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3328", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3327", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yawaper\u00ed", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Waimiri-Atroar\u00ed", "iso_1_code": null, "iso_3_code": "atr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3335", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3334", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3326", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South Amazonian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "E\u2019\u00f1apa Woromaipu", "iso_1_code": null, "iso_3_code": "pbh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3337", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Arara", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Arara, Par\u00e1", "iso_1_code": null, "iso_3_code": "aap", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3339", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ikpeng", "iso_1_code": null, "iso_3_code": "txi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3340", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3338", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bakair\u00ed", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bakair\u00ed", "iso_1_code": null, "iso_3_code": "bkq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3342", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Amonap", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kuik\u00faro-Kalap\u00e1lo", "iso_1_code": null, "iso_3_code": "kui", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3344", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Matipuhy", "iso_1_code": null, "iso_3_code": "mzo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3345", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3343", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3341", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3336", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tiriy\u00f3", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Salum\u00e1", "iso_1_code": null, "iso_3_code": "slj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3347", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karihona", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Carijona", "iso_1_code": null, "iso_3_code": "cbd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3349", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3348", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tiriy\u00f3", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Akurio", "iso_1_code": null, "iso_3_code": "ako", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3351", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tri\u00f3", "iso_1_code": null, "iso_3_code": "tri", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3352", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3350", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3346", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waiwai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hixkary\u00e1na", "iso_1_code": null, "iso_3_code": "hix", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3354", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Waiwai", "iso_1_code": null, "iso_3_code": "waw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3355", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3353", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yukpa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yucpa-Yapreria", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Japreria", "iso_1_code": null, "iso_3_code": "jru", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3358", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yukpa", "iso_1_code": null, "iso_3_code": "yup", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3359", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "3357", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3356", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3307", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Central Solomons.json b/data/Central Solomons.json index 94b78fb745810a113bf2c56e2d0efea2ede8381e..1c08122aa7c5423961dfaef8c8b0d386b553a33b 100644 --- a/data/Central Solomons.json +++ b/data/Central Solomons.json @@ -2,50 +2,50 @@ "name": "Central Solomons", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bilua", "iso_1_code": null, "iso_3_code": "blb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3361", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lavukaleve", "iso_1_code": null, "iso_3_code": "lvk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3362", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Savosavo", "iso_1_code": null, "iso_3_code": "svs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3363", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Touo", "iso_1_code": null, "iso_3_code": "tqu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3364", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3360", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Chapacuran.json b/data/Chapacuran.json index df9b38a2b14dc441a4c27f7eabdae1b2531421ab..c0332ebf64c1064e30f70f39ac84b775816925b3 100644 --- a/data/Chapacuran.json +++ b/data/Chapacuran.json @@ -2,72 +2,72 @@ "name": "Chapacuran", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Itene", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Itene", "iso_1_code": null, "iso_3_code": "ite", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3367", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tor\u00e1", "iso_1_code": null, "iso_3_code": "trz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3368", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3366", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wari", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Oro Win", "iso_1_code": null, "iso_3_code": "orw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3370", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Paka\u00e1snovos", "iso_1_code": null, "iso_3_code": "pav", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3371", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3369", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3365", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Chibchan.json b/data/Chibchan.json index 0b1c2e6029c6f28e03d6284286fa87b3dace0b3e..325a661d39c2007968aa2e8ce9e73df372bf722f 100644 --- a/data/Chibchan.json +++ b/data/Chibchan.json @@ -2,392 +2,392 @@ "name": "Chibchan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chibchan A", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Boruca", "iso_1_code": null, "iso_3_code": "brn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3374", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teribe", "iso_1_code": null, "iso_3_code": "tfr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3375", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Guaymi\u00edc", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ng\u00e4bere", "iso_1_code": null, "iso_3_code": "gym", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3377", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Buglere", "iso_1_code": null, "iso_3_code": "sab", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3378", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "3376", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Viceitic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bribri", "iso_1_code": null, "iso_3_code": "bzd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3380", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Cab\u00e9car", "iso_1_code": null, "iso_3_code": "cjp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3381", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "3379", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3373", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chibchan B", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pech", "iso_1_code": null, "iso_3_code": "pay", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3383", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern Chibchan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Colombian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Northern Colombian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chimila", "iso_1_code": null, "iso_3_code": "cbg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3387", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Arhuacan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kogi", "iso_1_code": null, "iso_3_code": "kog", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3389", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Southern and Eastern Arhuacan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Arhuaco", "iso_1_code": null, "iso_3_code": "arh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3391", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Guamaca-Atanque", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sanka", "iso_1_code": null, "iso_3_code": "mbp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3393", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3392", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3390", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3388", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3386", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern Colombian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bar\u00ed", "iso_1_code": null, "iso_3_code": "mot", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3395", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cundicocuyese", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chibcha", "iso_1_code": null, "iso_3_code": "chb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3397", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tunebo, Barro Negro", "iso_1_code": null, "iso_3_code": "tbn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3398", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tunebo, Western", "iso_1_code": null, "iso_3_code": "tnb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3399", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tunebo, Angosturas", "iso_1_code": null, "iso_3_code": "tnd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3400", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tunebo, Central", "iso_1_code": null, "iso_3_code": "tuf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3401", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "3396", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3394", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3385", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cuna", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kuna, San Blas", "iso_1_code": null, "iso_3_code": "cuk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3403", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kuna, Border", "iso_1_code": null, "iso_3_code": "kvn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3404", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "3402", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3384", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Votic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mal\u00e9ku Ja\u00edka", "iso_1_code": null, "iso_3_code": "gut", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3406", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rama", "iso_1_code": null, "iso_3_code": "rma", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3407", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3405", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3382", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3372", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Chimakuan.json b/data/Chimakuan.json index be5167ccff251c9324d9756490815ffde8bc711c..157318ab780469b3f52d927ccc05e2c21b37a9a4 100644 --- a/data/Chimakuan.json +++ b/data/Chimakuan.json @@ -2,30 +2,30 @@ "name": "Chimakuan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Quileute", "iso_1_code": null, "iso_3_code": "qui", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3409", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chemakum", "iso_1_code": null, "iso_3_code": "xch", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3410", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3408", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Chinookan.json b/data/Chinookan.json index 45bf668a32e57c7f9535f7eda9a4273178078855..bdac0a5b5f6181b5d0667c7de14c77b7475abb89 100644 --- a/data/Chinookan.json +++ b/data/Chinookan.json @@ -2,41 +2,41 @@ "name": "Chinookan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chinook", "iso_1_code": null, "iso_3_code": "chh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3412", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Upper Chinookan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Wasco-Wishram", "iso_1_code": null, "iso_3_code": "wac", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3414", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3413", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3411", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Chipaya-Uru.json b/data/Chipaya-Uru.json index 432e3f49fbbdd6a089ea5afe30cd0e2e08eefb97..e7b3fd48d37ea036b56c65f6446a57efda5b6171 100644 --- a/data/Chipaya-Uru.json +++ b/data/Chipaya-Uru.json @@ -2,32 +2,32 @@ "name": "Chipaya-Uru", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chipaya", "iso_1_code": null, "iso_3_code": "cap", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3416", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Uru", "iso_1_code": null, "iso_3_code": "ure", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3417", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3415", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Chocoan.json b/data/Chocoan.json index 6546ac5ea5e79d67bce295c68c34530deb363711..cb49e5d1bda4fc5f4095f858b2a47e09a10aea2f 100644 --- a/data/Chocoan.json +++ b/data/Chocoan.json @@ -2,121 +2,121 @@ "name": "Chocoan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Woun Meu", "iso_1_code": null, "iso_3_code": "noa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3419", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ember\u00e1", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Northern Ember\u00e1", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Embera Cat\u00edo", "iso_1_code": null, "iso_3_code": "cto", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3422", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ember\u00e1, Northern", "iso_1_code": null, "iso_3_code": "emp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3423", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "3421", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern Ember\u00e1", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Embera Baud\u00f3", "iso_1_code": null, "iso_3_code": "bdc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3425", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Embera Cham\u00ed", "iso_1_code": null, "iso_3_code": "cmi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3426", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Epena", "iso_1_code": null, "iso_3_code": "sja", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3427", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Embera Tad\u00f3", "iso_1_code": null, "iso_3_code": "tdc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3428", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3424", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3420", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3418", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Cholonan.json b/data/Cholonan.json index 0fbd7f4e703c1e7a1bc94944eaef5414c2247e46..63bc6f2b380ba07bf84933774845469914696331 100644 --- a/data/Cholonan.json +++ b/data/Cholonan.json @@ -2,30 +2,30 @@ "name": "Cholonan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chol\u00f3n", "iso_1_code": null, "iso_3_code": "cht", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3430", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hibito", "iso_1_code": null, "iso_3_code": "hib", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3431", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3429", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Chon.json b/data/Chon.json index 7b39b876e4ed8031ea35a46571439304ed356e8a..94c011ffa7119dc225643fd75293efb230b665e2 100644 --- a/data/Chon.json +++ b/data/Chon.json @@ -2,41 +2,41 @@ "name": "Chon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tehuelche", "iso_1_code": null, "iso_3_code": "teh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3433", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Island Chon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ona", "iso_1_code": null, "iso_3_code": "ona", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3435", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3434", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3432", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Chukotko-Kamchatkan.json b/data/Chukotko-Kamchatkan.json index b2d556d33fd441bc578e2fcbe45e77553dcc209e..0501f436cedd8d5a10ca94b72065642e3d6053f9 100644 --- a/data/Chukotko-Kamchatkan.json +++ b/data/Chukotko-Kamchatkan.json @@ -2,108 +2,108 @@ "name": "Chukotko-Kamchatkan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chukot", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chukchi", "iso_1_code": null, "iso_3_code": "ckt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3439", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "3438", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koryak-Alyutor", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Alutor", "iso_1_code": null, "iso_3_code": "alr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3441", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koryak", "iso_1_code": null, "iso_3_code": "kpy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3442", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kerek", "iso_1_code": null, "iso_3_code": "krk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3443", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3440", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3437", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Itelmen", "iso_1_code": null, "iso_3_code": "itl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3445", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "3444", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3436", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Chumashan.json b/data/Chumashan.json index 8321f978c2fddf4c26f6e85ae0e75dcda7c09e90..b0d15d3aa3e9bd94fa06029a1dea910b6b4d557b 100644 --- a/data/Chumashan.json +++ b/data/Chumashan.json @@ -2,92 +2,92 @@ "name": "Chumashan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Obispe\u00f1o", "iso_1_code": null, "iso_3_code": "obi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3447", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central Chumash", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Barbare\u00f1o", "iso_1_code": null, "iso_3_code": "boi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3449", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Inese\u00f1o", "iso_1_code": null, "iso_3_code": "inz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3450", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Purisime\u00f1o", "iso_1_code": null, "iso_3_code": "puy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3451", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Venture\u00f1o", "iso_1_code": null, "iso_3_code": "veo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3452", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3448", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Island Chumash", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cruze\u00f1o", "iso_1_code": null, "iso_3_code": "crz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3454", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3453", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3446", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git "a/data/Cochim\303\255-Yuman.json" "b/data/Cochim\303\255-Yuman.json" index a3950f7f3003bffa07c77a2fd970bbe20870e871..e0803ab716f8780f2636cf82245bcafec2d223dd 100644 --- "a/data/Cochim\303\255-Yuman.json" +++ "b/data/Cochim\303\255-Yuman.json" @@ -2,155 +2,155 @@ "name": "Cochim\u00ed-Yuman", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yuman", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cochimi", "iso_1_code": null, "iso_3_code": "coj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3457", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kiliwa", "iso_1_code": null, "iso_3_code": "klb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3458", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Delta-California", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cocopa", "iso_1_code": null, "iso_3_code": "coc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3460", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kumiai", "iso_1_code": null, "iso_3_code": "dih", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3461", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3459", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Paipai", "iso_1_code": null, "iso_3_code": "ppi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3463", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Havasupai-Walapai-Yavapai", "iso_1_code": null, "iso_3_code": "yuf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3464", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3462", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "River", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mojave", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mohave", "iso_1_code": null, "iso_3_code": "mov", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3467", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maricopa", "iso_1_code": null, "iso_3_code": "mrc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3468", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Quechan", "iso_1_code": null, "iso_3_code": "yum", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3469", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3466", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3465", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3456", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3455", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Comecrudan.json b/data/Comecrudan.json index 0a8d8f8bda6213548067b47c56dcadb6f6639453..ecb9e8296346a7f114bf400eba790790a03ed57a 100644 --- a/data/Comecrudan.json +++ b/data/Comecrudan.json @@ -2,60 +2,60 @@ "name": "Comecrudan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mamulique", "iso_1_code": null, "iso_3_code": "emm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3471", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Comecrudo", "iso_1_code": null, "iso_3_code": "xcm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3472", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cotoname", "iso_1_code": null, "iso_3_code": "xcn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3473", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Coahuilteco", "iso_1_code": null, "iso_3_code": "xcw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3474", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Garza", "iso_1_code": null, "iso_3_code": "xgr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3475", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3470", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Constructed language.json b/data/Constructed language.json index 15bc6802ea845803952125e0de154b3b01931d9e..d051f96d89739cab19e7df98fdd4c4219d96ce8e 100644 --- a/data/Constructed language.json +++ b/data/Constructed language.json @@ -2,22 +2,22 @@ "name": "Constructed language", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Esperanto", "iso_1_code": "eo", "iso_3_code": "epo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3477", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "3476", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Coosan.json b/data/Coosan.json index 65af07a7ad0b5d8a43616896ee43234bcdfff00d..9db5231a1ad205f528c5a669b32e3909ad2e9c87 100644 --- a/data/Coosan.json +++ b/data/Coosan.json @@ -2,30 +2,30 @@ "name": "Coosan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Coos", "iso_1_code": null, "iso_3_code": "csz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3479", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miluk", "iso_1_code": null, "iso_3_code": "iml", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3480", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3478", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Creole.json b/data/Creole.json index 73e55ed458c91ab703952742ea3a86b283cb30c3..0aec317d1423ed8249c8c30d83ffd8522d21a8c9 100644 --- a/data/Creole.json +++ b/data/Creole.json @@ -2,2288 +2,1742 @@ "name": "Creole", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Afrikaans based", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Flaaitaal", "iso_1_code": null, "iso_3_code": "fly", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3483", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oorlams", "iso_1_code": null, "iso_3_code": "oor", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3484", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3482", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Arabic based", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Nubi", "iso_1_code": null, "iso_3_code": "kcn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3486", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Arabic, Juba", "iso_1_code": "ar", "iso_3_code": "pga", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ar\")", - "original_lang_name": "arabic", - "original_lang_code": "ara", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "3487", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3485", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Assamese based", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nagamese", "iso_1_code": null, "iso_3_code": "nag", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3489", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3488", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dutch based", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Berbice Dutch Creole", "iso_1_code": null, "iso_3_code": "brc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3491", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Negerhollands", "iso_1_code": null, "iso_3_code": "dcr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3492", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Javindo", "iso_1_code": null, "iso_3_code": "jvd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3493", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Petjo", "iso_1_code": null, "iso_3_code": "pey", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3494", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Skepi Dutch Creole", "iso_1_code": null, "iso_3_code": "skw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3495", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3490", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "English based", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"pcm\")", - "original_lang_name": "nigerian_pidgin", - "original_lang_code": "pcm", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Saramaccan", "iso_1_code": null, "iso_3_code": "srm", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3497", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Atlantic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"pcm\")", - "original_lang_name": "nigerian_pidgin", - "original_lang_code": "pcm", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"pcm\")", - "original_lang_name": "nigerian_pidgin", - "original_lang_code": "pcm", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Turks and Caicos English Creole", "iso_1_code": null, "iso_3_code": "tch", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3500", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"pcm\")", - "original_lang_name": "nigerian_pidgin", - "original_lang_code": "pcm", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Afro-Seminole Creole", "iso_1_code": null, "iso_3_code": "afs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3502", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bahamas English Creole", "iso_1_code": null, "iso_3_code": "bah", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3503", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sea Island English Creole", "iso_1_code": null, "iso_3_code": "gul", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3504", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "3501", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Leeward Caribbean English Creole", "iso_1_code": null, "iso_3_code": "aig", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3506", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bajan", "iso_1_code": null, "iso_3_code": "bjs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3507", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Grenadian English Creole", "iso_1_code": null, "iso_3_code": "gcl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3508", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Guyanese English Creole", "iso_1_code": null, "iso_3_code": "gyn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3509", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vincentian English Creole", "iso_1_code": null, "iso_3_code": "svc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3510", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tobagonian English Creole", "iso_1_code": null, "iso_3_code": "tgh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3511", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Trinidadian English Creole", "iso_1_code": null, "iso_3_code": "trf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3512", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Virgin Islands English Creole", "iso_1_code": null, "iso_3_code": "vic", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3513", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3505", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "3499", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Krio", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "3499", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Krio", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Equatorial Guinean Pidgin", "iso_1_code": null, "iso_3_code": "fpe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3515", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ghanaian Pidgin English", "iso_1_code": null, "iso_3_code": "gpe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3516", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Krio", "iso_1_code": null, "iso_3_code": "kri", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3517", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Pidgin, Nigerian", "iso_1_code": null, "iso_3_code": "pcm", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3518", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] }, { "name": "Pidgin, Cameroon", "iso_1_code": null, "iso_3_code": "wes", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3519", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "3514", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Suriname", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "3514", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Suriname", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Sranan Tongo", "iso_1_code": null, "iso_3_code": "srn", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3521", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ndyuka", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"pcm\")", - "original_lang_name": "nigerian_pidgin", - "original_lang_code": "pcm", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Aukan", "iso_1_code": null, "iso_3_code": "djk", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3523", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kwinti", "iso_1_code": null, "iso_3_code": "kww", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3524", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "3522", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "3520", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Western", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "3520", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Western", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Belize English Creole", "iso_1_code": null, "iso_3_code": "bzj", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3526", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nicaragua English Creole", "iso_1_code": null, "iso_3_code": "bzk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3527", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Islander English Creole", "iso_1_code": null, "iso_3_code": "icr", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3528", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Jamaican English Creole", "iso_1_code": null, "iso_3_code": "jam", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3529", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "3525", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "3498", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Pacific", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "3498", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Pacific", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Bislama", "iso_1_code": "bi", "iso_3_code": "bis", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3531", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Hawaii Pidgin", "iso_1_code": null, "iso_3_code": "hwc", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3532", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ngatik Men\u2019s Creole", "iso_1_code": null, "iso_3_code": "ngm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3533", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pitcairn-Norfolk", "iso_1_code": null, "iso_3_code": "pih", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3534", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pijin", "iso_1_code": null, "iso_3_code": "pis", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3535", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kriol", "iso_1_code": null, "iso_3_code": "rop", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3536", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Torres Strait Creole", "iso_1_code": null, "iso_3_code": "tcs", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3537", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tok Pisin", "iso_1_code": null, "iso_3_code": "tpi", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3538", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "3530", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "3496", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "French based", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Lesser Antillean French Creole", "iso_1_code": null, "iso_3_code": "acf", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3540", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tayo", "iso_1_code": null, "iso_3_code": "cks", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3541", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Seychelles French Creole", "iso_1_code": null, "iso_3_code": "crs", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3542", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Guadeloupean French Creole", "iso_1_code": null, "iso_3_code": "gcf", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3543", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Guianese French Creole", "iso_1_code": null, "iso_3_code": "gcr", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3544", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Haitian Creole", "iso_1_code": "ht", "iso_3_code": "hat", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3545", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Karipuna French Creole", "iso_1_code": null, "iso_3_code": "kmv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3546", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Louisiana Creole", "iso_1_code": null, "iso_3_code": "lou", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3547", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Morisyen", "iso_1_code": null, "iso_3_code": "mfe", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3548", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "R\u00e9union French Creole", "iso_1_code": null, "iso_3_code": "rcf", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3549", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "San Miguel French Creole", "iso_1_code": null, "iso_3_code": "scf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3550", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "3539", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "German based", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Unserdeutsch", "iso_1_code": null, "iso_3_code": "uln", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3552", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3551", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hindi based", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Andaman Hindi Creole", "iso_1_code": null, "iso_3_code": "hca", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3554", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3553", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Iberian based", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Papiamentu", "iso_1_code": null, "iso_3_code": "pap", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3556", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "3555", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Japanese-based", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yilan Creole", "iso_1_code": null, "iso_3_code": "ycr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3558", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3557", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kongo based", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kituba", "iso_1_code": null, "iso_3_code": "ktu", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3560", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kituba", "iso_1_code": null, "iso_3_code": "mkw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3561", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "3559", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay based", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Malay, Ambonese", "iso_1_code": null, "iso_3_code": "abs", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3563", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Betawi", "iso_1_code": null, "iso_3_code": "bew", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3564", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Malay, Banda", "iso_1_code": null, "iso_3_code": "bpq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3565", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malaccan Malay Creole", "iso_1_code": null, "iso_3_code": "ccm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3566", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay, Cocos Islands", "iso_1_code": "ms", "iso_3_code": "coa", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "3567", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay, Larantuka", "iso_1_code": null, "iso_3_code": "lrt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3568", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay, North Moluccan", "iso_1_code": "ms", "iso_3_code": "max", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3569", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": true + ] }, { "name": "Malay, Baba", "iso_1_code": null, "iso_3_code": "mbf", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3570", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Malay, Balinese", "iso_1_code": null, "iso_3_code": "mhp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3571", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay, Kupang", "iso_1_code": null, "iso_3_code": "mkn", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3572", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Indonesian, Peranakan", "iso_1_code": null, "iso_3_code": "pea", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3573", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay, Papuan", "iso_1_code": null, "iso_3_code": "pmy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3574", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sri Lankan Malay Creole", "iso_1_code": null, "iso_3_code": "sci", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3575", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay, Manado", "iso_1_code": "ms", "iso_3_code": "xmm", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3576", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": true + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "3562", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngbandi based", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Sango", "iso_1_code": "sg", "iso_3_code": "sag", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3578", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sango, Riverain", "iso_1_code": null, "iso_3_code": "snj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3579", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "3577", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Portuguese based", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Angolar", "iso_1_code": null, "iso_3_code": "aoa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3581", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cafundo Creole", "iso_1_code": null, "iso_3_code": "ccd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3582", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "S\u00e3otomense", "iso_1_code": null, "iso_3_code": "cri", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3583", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Fa d\u2019Ambu", "iso_1_code": null, "iso_3_code": "fab", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3584", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Indo-Portuguese", "iso_1_code": null, "iso_3_code": "idb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3585", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kabuverdianu", "iso_1_code": null, "iso_3_code": "kea", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3586", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Malaccan Portuguese Creole", "iso_1_code": null, "iso_3_code": "mcm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3587", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Macanese", "iso_1_code": null, "iso_3_code": "mzs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3588", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Guinea-Bissau Creole", "iso_1_code": null, "iso_3_code": "pov", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3589", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Principense", "iso_1_code": null, "iso_3_code": "pre", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3590", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ternate\u00f1o", "iso_1_code": null, "iso_3_code": "tmg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3591", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pidgin, Timor", "iso_1_code": null, "iso_3_code": "tvy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3592", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Korlai Portuguese Creole", "iso_1_code": null, "iso_3_code": "vkp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3593", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "3580", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Spanish based", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Chavacano", "iso_1_code": null, "iso_3_code": "cbk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3595", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Palenquero", "iso_1_code": null, "iso_3_code": "pln", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3596", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "3594", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Swahili based", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cutchi-Swahili", "iso_1_code": null, "iso_3_code": "ccl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3598", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3597", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tetun based", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Tetun Dili", "iso_1_code": null, "iso_3_code": "tdt", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3600", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "3599", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"pcm\")", + "original_lang_name": "nigerian_pidgin", + "original_lang_code": "pcm", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "3481", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Dravidian.json b/data/Dravidian.json index bf352795e9834e5ff51309cd3007052a41733899..3c16d782148a636cabf8d8c0cb4b7399e11ccbda 100644 --- a/data/Dravidian.json +++ b/data/Dravidian.json @@ -2,1765 +2,1436 @@ "name": "Dravidian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Telu": { - "full_object": "SpaCyTokenizer(\"te\")", - "original_lang_name": "telugu", - "original_lang_code": "tel", - "scripts": [ - "Telu", - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Latn": { - "full_object": "SpaCyTokenizer(\"ta\")", - "original_lang_name": "tamil", - "original_lang_code": "tam", - "scripts": [ - "Latn", - "Taml" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Knda": { - "full_object": "SpaCyTokenizer(\"kn\")", - "original_lang_name": "kannada", - "original_lang_code": "kan", - "scripts": [ - "Latn", - "Knda" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Mlym": { - "full_object": "SpaCyTokenizer(\"ml\")", - "original_lang_name": "malayalam", - "original_lang_code": "mal", - "scripts": [ - "Latn", - "Mlym" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Taml": { - "full_object": "SpaCyTokenizer(\"ta\")", - "original_lang_name": "tamil", - "original_lang_code": "tam", - "scripts": [ - "Latn", - "Taml" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kolami-Naiki", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kolami, Northwestern", "iso_1_code": null, "iso_3_code": "kfb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3604", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kolami, Southeastern", "iso_1_code": null, "iso_3_code": "nit", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3605", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3603", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Parji-Gadaba", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gadaba, Mudhili", "iso_1_code": null, "iso_3_code": "gau", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3607", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gadaba, Pottangi Ollar", "iso_1_code": null, "iso_3_code": "gdb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3608", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Duruwa", "iso_1_code": null, "iso_3_code": "pci", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3609", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3606", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3602", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Brahui", "iso_1_code": null, "iso_3_code": "brh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3611", + "native_tokenizers": [], "scripts": [ "Arab" - ], - "own_tokenizer": false + ] }, { "name": "Kumarbhag Paharia", "iso_1_code": null, "iso_3_code": "kmj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3612", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kurux", "iso_1_code": null, "iso_3_code": "kru", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3613", + "native_tokenizers": [], "scripts": [ "Deva" - ], - "own_tokenizer": false + ] }, { "name": "Sauria Paharia", "iso_1_code": null, "iso_3_code": "mjt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3614", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kisan", "iso_1_code": null, "iso_3_code": "xis", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3615", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3610", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South-Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Telu": { - "full_object": "SpaCyTokenizer(\"te\")", - "original_lang_name": "telugu", - "original_lang_code": "tel", - "scripts": [ - "Telu", - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Latn": { - "full_object": "SpaCyTokenizer(\"te\")", - "original_lang_name": "telugu", - "original_lang_code": "tel", - "scripts": [ - "Telu", - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Gondi-Kui", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Telu": { - "full_object": "SpaCyTokenizer(\"te\")", - "original_lang_name": "telugu", - "original_lang_code": "tel", - "scripts": [ - "Telu", - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Latn": { - "full_object": "SpaCyTokenizer(\"te\")", - "original_lang_name": "telugu", - "original_lang_code": "tel", - "scripts": [ - "Telu", - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Gondi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Telu": { - "full_object": "SpaCyTokenizer(\"te\")", - "original_lang_name": "telugu", - "original_lang_code": "tel", - "scripts": [ - "Telu", - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Latn": { - "full_object": "SpaCyTokenizer(\"te\")", - "original_lang_name": "telugu", - "original_lang_code": "tel", - "scripts": [ - "Telu", - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Maria, Dandami", "iso_1_code": null, "iso_3_code": "daq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3619", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Muria, Eastern", "iso_1_code": null, "iso_3_code": "emu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3620", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gondi, Aheri", "iso_1_code": null, "iso_3_code": "esg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3621", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Muria, Far Western", "iso_1_code": null, "iso_3_code": "fmu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3622", + "native_tokenizers": [], "scripts": [ "Deva" - ], - "own_tokenizer": false + ] }, { "name": "Gondi, Northern", "iso_1_code": null, "iso_3_code": "gno", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3623", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khirwar", "iso_1_code": null, "iso_3_code": "kwx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3624", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maria", "iso_1_code": null, "iso_3_code": "mrr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3625", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Muria, Western", "iso_1_code": null, "iso_3_code": "mut", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3626", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nagarchal", "iso_1_code": null, "iso_3_code": "nbg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3627", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pardhan", "iso_1_code": null, "iso_3_code": "pch", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3628", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gondi, Adilabad", "iso_1_code": null, "iso_3_code": "wsg", + "children": [], "tokenizers": { "Telu": { - "full_object": "SpaCyTokenizer(\"te\")", + "full_object": "IndicNLPTokenizer(\"te\")", "original_lang_name": "telugu", "original_lang_code": "tel", - "scripts": [ - "Telu", - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Telu", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "3629", + "native_tokenizers": [], "scripts": [ "Telu" - ], - "own_tokenizer": false + ] } ], - "node_i": "3618", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Konda-Kui", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Telu": { - "full_object": "SpaCyTokenizer(\"te\")", - "original_lang_name": "telugu", - "original_lang_code": "tel", - "scripts": [ - "Telu", - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Latn": { - "full_object": "SpaCyTokenizer(\"te\")", + "full_object": "IndicNLPTokenizer(\"te\")", "original_lang_name": "telugu", "original_lang_code": "tel", - "scripts": [ - "Telu", - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Telu", + "class_name": "IndicNLPTokenizer" } }, + "node_i": "3618", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Konda-Kui", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Konda", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Konda-Dora", "iso_1_code": null, "iso_3_code": "kfc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3632", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mukha-Dora", "iso_1_code": null, "iso_3_code": "mmk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3633", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3631", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manda-Kui", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Telu": { - "full_object": "SpaCyTokenizer(\"te\")", - "original_lang_name": "telugu", - "original_lang_code": "tel", - "scripts": [ - "Telu", - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Latn": { - "full_object": "SpaCyTokenizer(\"te\")", - "original_lang_name": "telugu", - "original_lang_code": "tel", - "scripts": [ - "Telu", - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kui-Kuvi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Telu": { - "full_object": "SpaCyTokenizer(\"te\")", - "original_lang_name": "telugu", - "original_lang_code": "tel", - "scripts": [ - "Telu", - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Latn": { - "full_object": "SpaCyTokenizer(\"te\")", - "original_lang_name": "telugu", - "original_lang_code": "tel", - "scripts": [ - "Telu", - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kui, Dawik", "iso_1_code": null, "iso_3_code": "dwk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3636", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koya", "iso_1_code": null, "iso_3_code": "kff", + "children": [], "tokenizers": { "Telu": { - "full_object": "SpaCyTokenizer(\"te\")", + "full_object": "IndicNLPTokenizer(\"te\")", "original_lang_name": "telugu", "original_lang_code": "tel", - "scripts": [ - "Telu", - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Telu", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "3637", + "native_tokenizers": [], "scripts": [ "Telu" - ], - "own_tokenizer": false + ] }, { "name": "Kuvi", "iso_1_code": null, "iso_3_code": "kxv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3638", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kui", "iso_1_code": null, "iso_3_code": "uki", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3639", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Telu": { + "full_object": "IndicNLPTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "script": "Telu", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "3635", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manda-Pengo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Manda", "iso_1_code": null, "iso_3_code": "mha", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3641", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pengo", "iso_1_code": null, "iso_3_code": "peg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3642", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3640", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Telu": { + "full_object": "IndicNLPTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "script": "Telu", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "3634", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Telu": { + "full_object": "IndicNLPTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "script": "Telu", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "3630", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "3617", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Telugu", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Telu": { - "full_object": "SpaCyTokenizer(\"te\")", + "full_object": "IndicNLPTokenizer(\"te\")", "original_lang_name": "telugu", "original_lang_code": "tel", - "scripts": [ - "Telu", - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Latn": { - "full_object": "SpaCyTokenizer(\"te\")", - "original_lang_name": "telugu", - "original_lang_code": "tel", - "scripts": [ - "Telu", - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Telu", + "class_name": "IndicNLPTokenizer" } }, + "node_i": "3617", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Telugu", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Chenchu", "iso_1_code": null, "iso_3_code": "cde", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3644", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manna-Dora", "iso_1_code": null, "iso_3_code": "mju", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3645", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Telugu", "iso_1_code": "te", "iso_3_code": "tel", + "children": [], "tokenizers": { "Telu": { - "full_object": "SpaCyTokenizer(\"te\")", - "original_lang_name": "telugu", - "original_lang_code": "tel", - "scripts": [ - "Telu", - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Latn": { - "full_object": "SpaCyTokenizer(\"te\")", + "full_object": "IndicNLPTokenizer(\"te\")", "original_lang_name": "telugu", "original_lang_code": "tel", - "scripts": [ - "Telu", - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Telu", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "3646", + "native_tokenizers": [ + "Telu" + ], "scripts": [ "Telu", "Latn" - ], - "own_tokenizer": true + ] }, { "name": "Waddar", "iso_1_code": null, "iso_3_code": "wbq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3647", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Telu": { + "full_object": "IndicNLPTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "script": "Telu", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "3643", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Telu": { + "full_object": "IndicNLPTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "script": "Telu", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "3616", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ta\")", - "original_lang_name": "tamil", - "original_lang_code": "tam", - "scripts": [ - "Latn", - "Taml" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Knda": { - "full_object": "SpaCyTokenizer(\"kn\")", - "original_lang_name": "kannada", - "original_lang_code": "kan", - "scripts": [ - "Latn", - "Knda" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Mlym": { - "full_object": "SpaCyTokenizer(\"ml\")", - "original_lang_name": "malayalam", - "original_lang_code": "mal", - "scripts": [ - "Latn", - "Mlym" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Taml": { - "full_object": "SpaCyTokenizer(\"ta\")", - "original_lang_name": "tamil", - "original_lang_code": "tam", - "scripts": [ - "Latn", - "Taml" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kurichiya", "iso_1_code": null, "iso_3_code": "kfh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3649", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kurumba, Attapady", "iso_1_code": null, "iso_3_code": "pkr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3650", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pathiya", "iso_1_code": null, "iso_3_code": "pty", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3651", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Muduga", "iso_1_code": null, "iso_3_code": "udg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3652", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kumbaran", "iso_1_code": null, "iso_3_code": "wkb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3653", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kalanadi", "iso_1_code": null, "iso_3_code": "wkl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3654", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kunduvadi", "iso_1_code": null, "iso_3_code": "wku", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3655", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tamil-Kannada", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ta\")", - "original_lang_name": "tamil", - "original_lang_code": "tam", - "scripts": [ - "Latn", - "Taml" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Knda": { - "full_object": "SpaCyTokenizer(\"kn\")", - "original_lang_name": "kannada", - "original_lang_code": "kan", - "scripts": [ - "Latn", - "Knda" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Mlym": { - "full_object": "SpaCyTokenizer(\"ml\")", - "original_lang_name": "malayalam", - "original_lang_code": "mal", - "scripts": [ - "Latn", - "Mlym" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Taml": { - "full_object": "SpaCyTokenizer(\"ta\")", - "original_lang_name": "tamil", - "original_lang_code": "tam", - "scripts": [ - "Latn", - "Taml" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kannada", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"kn\")", - "original_lang_name": "kannada", - "original_lang_code": "kan", - "scripts": [ - "Latn", - "Knda" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Knda": { - "full_object": "SpaCyTokenizer(\"kn\")", - "original_lang_name": "kannada", - "original_lang_code": "kan", - "scripts": [ - "Latn", - "Knda" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Badaga", "iso_1_code": null, "iso_3_code": "bfq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3658", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Holiya", "iso_1_code": null, "iso_3_code": "hoy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3659", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kannada", "iso_1_code": "kn", "iso_3_code": "kan", + "children": [], "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"kn\")", - "original_lang_name": "kannada", - "original_lang_code": "kan", - "scripts": [ - "Latn", - "Knda" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, "Knda": { - "full_object": "SpaCyTokenizer(\"kn\")", + "full_object": "IndicNLPTokenizer(\"kn\")", "original_lang_name": "kannada", "original_lang_code": "kan", - "scripts": [ - "Latn", - "Knda" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Knda", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "3660", + "native_tokenizers": [ + "Knda" + ], "scripts": [ "Latn", "Knda" - ], - "own_tokenizer": true + ] }, { "name": "Urali", "iso_1_code": null, "iso_3_code": "url", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3661", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "3657", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Tamil-Kodagu", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ta\")", - "original_lang_name": "tamil", - "original_lang_code": "tam", - "scripts": [ - "Latn", - "Taml" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Mlym": { - "full_object": "SpaCyTokenizer(\"ml\")", - "original_lang_name": "malayalam", - "original_lang_code": "mal", - "scripts": [ - "Latn", - "Mlym" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Taml": { - "full_object": "SpaCyTokenizer(\"ta\")", - "original_lang_name": "tamil", - "original_lang_code": "tam", - "scripts": [ - "Latn", - "Taml" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "Knda": { + "full_object": "IndicNLPTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "script": "Knda", + "class_name": "IndicNLPTokenizer" } }, + "node_i": "3657", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Tamil-Kodagu", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Kodagu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kodava", "iso_1_code": null, "iso_3_code": "kfa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3664", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kurumba, Kannada", "iso_1_code": null, "iso_3_code": "kfi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3665", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kurumba, Mullu", "iso_1_code": null, "iso_3_code": "kpb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3666", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kurumba, Alu", "iso_1_code": null, "iso_3_code": "xua", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3667", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kurumba, Jennu", "iso_1_code": null, "iso_3_code": "xuj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3668", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3663", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tamil-Malayalam", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ta\")", - "original_lang_name": "tamil", - "original_lang_code": "tam", - "scripts": [ - "Latn", - "Taml" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Mlym": { - "full_object": "SpaCyTokenizer(\"ml\")", - "original_lang_name": "malayalam", - "original_lang_code": "mal", - "scripts": [ - "Latn", - "Mlym" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Taml": { - "full_object": "SpaCyTokenizer(\"ta\")", - "original_lang_name": "tamil", - "original_lang_code": "tam", - "scripts": [ - "Latn", - "Taml" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Mannan", "iso_1_code": null, "iso_3_code": "mjv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3670", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malayalam", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ml\")", - "original_lang_name": "malayalam", - "original_lang_code": "mal", - "scripts": [ - "Latn", - "Mlym" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Mlym": { - "full_object": "SpaCyTokenizer(\"ml\")", - "original_lang_name": "malayalam", - "original_lang_code": "mal", - "scripts": [ - "Latn", - "Mlym" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Aranadan", "iso_1_code": null, "iso_3_code": "aaf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3672", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kadar", "iso_1_code": null, "iso_3_code": "kej", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3673", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malayalam", "iso_1_code": "ml", "iso_3_code": "mal", + "children": [], "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ml\")", - "original_lang_name": "malayalam", - "original_lang_code": "mal", - "scripts": [ - "Latn", - "Mlym" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, "Mlym": { - "full_object": "SpaCyTokenizer(\"ml\")", + "full_object": "IndicNLPTokenizer(\"ml\")", "original_lang_name": "malayalam", "original_lang_code": "mal", - "scripts": [ - "Latn", - "Mlym" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Mlym", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "3674", + "native_tokenizers": [ + "Mlym" + ], "scripts": [ "Latn", "Mlym" - ], - "own_tokenizer": true + ] }, { "name": "Malapandaram", "iso_1_code": null, "iso_3_code": "mjp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3675", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malaryan", "iso_1_code": null, "iso_3_code": "mjq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3676", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malavedan", "iso_1_code": null, "iso_3_code": "mjr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3677", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Paliyan", "iso_1_code": null, "iso_3_code": "pcf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3678", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Paniya", "iso_1_code": null, "iso_3_code": "pcg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3679", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ravula", "iso_1_code": null, "iso_3_code": "yea", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3680", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Mlym": { + "full_object": "IndicNLPTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "script": "Mlym", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "3671", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tamil", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ta\")", - "original_lang_name": "tamil", - "original_lang_code": "tam", - "scripts": [ - "Latn", - "Taml" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Taml": { - "full_object": "SpaCyTokenizer(\"ta\")", - "original_lang_name": "tamil", - "original_lang_code": "tam", - "scripts": [ - "Latn", - "Taml" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Eravallan", "iso_1_code": null, "iso_3_code": "era", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3682", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Irula", "iso_1_code": null, "iso_3_code": "iru", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3683", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaikadi", "iso_1_code": null, "iso_3_code": "kep", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3684", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kanikkaran", "iso_1_code": null, "iso_3_code": "kev", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3685", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Muthuvan", "iso_1_code": null, "iso_3_code": "muv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3686", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sholaga", "iso_1_code": null, "iso_3_code": "sle", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3687", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tamil", "iso_1_code": "ta", "iso_3_code": "tam", + "children": [], "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ta\")", - "original_lang_name": "tamil", - "original_lang_code": "tam", - "scripts": [ - "Latn", - "Taml" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, "Taml": { - "full_object": "SpaCyTokenizer(\"ta\")", + "full_object": "IndicNLPTokenizer(\"ta\")", "original_lang_name": "tamil", "original_lang_code": "tam", - "scripts": [ - "Latn", - "Taml" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Taml", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "3688", + "native_tokenizers": [ + "Taml" + ], "scripts": [ "Taml", "Latn" - ], - "own_tokenizer": true + ] }, { "name": "Kurumba, Betta", "iso_1_code": null, "iso_3_code": "xub", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3689", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yerukula", "iso_1_code": null, "iso_3_code": "yeu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3690", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Taml": { + "full_object": "IndicNLPTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "script": "Taml", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "3681", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Mlym": { + "full_object": "IndicNLPTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "script": "Mlym", + "class_name": "IndicNLPTokenizer" + }, + "Taml": { + "full_object": "IndicNLPTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "script": "Taml", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "3669", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Toda-Kota", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kota", "iso_1_code": null, "iso_3_code": "kfe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3692", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Toda", "iso_1_code": null, "iso_3_code": "tcx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3693", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3691", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Mlym": { + "full_object": "IndicNLPTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "script": "Mlym", + "class_name": "IndicNLPTokenizer" + }, + "Taml": { + "full_object": "IndicNLPTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "script": "Taml", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "3662", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chetti, Wayanad", "iso_1_code": null, "iso_3_code": "ctt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3695", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3694", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "3656", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Tulu", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Knda": { - "full_object": "SpaCyTokenizer(\"kn\")", + "full_object": "IndicNLPTokenizer(\"kn\")", "original_lang_name": "kannada", "original_lang_code": "kan", - "scripts": [ - "Latn", - "Knda" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Knda", + "class_name": "IndicNLPTokenizer" + }, + "Mlym": { + "full_object": "IndicNLPTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "script": "Mlym", + "class_name": "IndicNLPTokenizer" + }, + "Taml": { + "full_object": "IndicNLPTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "script": "Taml", + "class_name": "IndicNLPTokenizer" } }, + "node_i": "3656", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Tulu", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Bellari", "iso_1_code": null, "iso_3_code": "brw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3697", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kudiya", "iso_1_code": null, "iso_3_code": "kfg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3698", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tulu", "iso_1_code": null, "iso_3_code": "tcy", + "children": [], "tokenizers": { "Knda": { - "full_object": "SpaCyTokenizer(\"kn\")", + "full_object": "IndicNLPTokenizer(\"kn\")", "original_lang_name": "kannada", "original_lang_code": "kan", - "scripts": [ - "Latn", - "Knda" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Knda", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "3699", + "native_tokenizers": [], "scripts": [ "Knda" - ], - "own_tokenizer": false + ] }, { "name": "Koraga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Koraga, Korra", "iso_1_code": null, "iso_3_code": "kfd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3701", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koraga, Mudu", "iso_1_code": null, "iso_3_code": "vmd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3702", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3700", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Knda": { + "full_object": "IndicNLPTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "script": "Knda", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "3696", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mala Malasar", "iso_1_code": null, "iso_3_code": "ima", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3704", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Thachanadan", "iso_1_code": null, "iso_3_code": "thn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3705", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ullatan", "iso_1_code": null, "iso_3_code": "ull", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3706", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malasar", "iso_1_code": null, "iso_3_code": "ymr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3707", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3703", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Knda": { + "full_object": "IndicNLPTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "script": "Knda", + "class_name": "IndicNLPTokenizer" + }, + "Mlym": { + "full_object": "IndicNLPTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "script": "Mlym", + "class_name": "IndicNLPTokenizer" + }, + "Taml": { + "full_object": "IndicNLPTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "script": "Taml", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "3648", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Allar", "iso_1_code": null, "iso_3_code": "all", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3709", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bharia", "iso_1_code": null, "iso_3_code": "bha", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3710", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malankuravan", "iso_1_code": null, "iso_3_code": "mjo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3711", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pattapu", "iso_1_code": null, "iso_3_code": "ptq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3712", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vishavan", "iso_1_code": null, "iso_3_code": "vis", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3713", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3708", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Telu": { + "full_object": "IndicNLPTokenizer(\"te\")", + "original_lang_name": "telugu", + "original_lang_code": "tel", + "script": "Telu", + "class_name": "IndicNLPTokenizer" + }, + "Knda": { + "full_object": "IndicNLPTokenizer(\"kn\")", + "original_lang_name": "kannada", + "original_lang_code": "kan", + "script": "Knda", + "class_name": "IndicNLPTokenizer" + }, + "Mlym": { + "full_object": "IndicNLPTokenizer(\"ml\")", + "original_lang_name": "malayalam", + "original_lang_code": "mal", + "script": "Mlym", + "class_name": "IndicNLPTokenizer" + }, + "Taml": { + "full_object": "IndicNLPTokenizer(\"ta\")", + "original_lang_name": "tamil", + "original_lang_code": "tam", + "script": "Taml", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "3601", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git "a/data/East Bird\342\200\231s Head-Sentani.json" "b/data/East Bird\342\200\231s Head-Sentani.json" index e9654bd51e92afdc797c0256ea34ded19942d0e6..280aab322670d07da6a2c72292bc99e200c71fb5 100644 --- "a/data/East Bird\342\200\231s Head-Sentani.json" +++ "b/data/East Bird\342\200\231s Head-Sentani.json" @@ -2,173 +2,173 @@ "name": "East Bird\u2019s Head-Sentani", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Burmeso", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Burmeso", "iso_1_code": null, "iso_3_code": "bzu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3716", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3715", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East Bird\u2019s Head", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mantion", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sougb", "iso_1_code": null, "iso_3_code": "mnx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3719", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "3718", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Meax", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Meyah", "iso_1_code": null, "iso_3_code": "mej", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3721", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Moskona", "iso_1_code": null, "iso_3_code": "mtj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3722", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "3720", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3717", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sentani", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Demta", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sowari", "iso_1_code": null, "iso_3_code": "dmy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3725", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3724", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sentani Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nafri", "iso_1_code": null, "iso_3_code": "nxx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3727", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sentani", "iso_1_code": null, "iso_3_code": "set", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3728", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tabla", "iso_1_code": null, "iso_3_code": "tnm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3729", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3726", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3723", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3714", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/East Geelvink Bay.json b/data/East Geelvink Bay.json index 1ddc32546c3e463991bb3c5096f697a765893bdd..ceac33343175b677d014e29c1434680a0174a299 100644 --- a/data/East Geelvink Bay.json +++ b/data/East Geelvink Bay.json @@ -2,143 +2,143 @@ "name": "East Geelvink Bay", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Anasi", "iso_1_code": null, "iso_3_code": "bpo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3731", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Barapasi", "iso_1_code": null, "iso_3_code": "brp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3732", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Burate", "iso_1_code": null, "iso_3_code": "bti", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3733", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kehu", "iso_1_code": null, "iso_3_code": "khh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3734", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kofei", "iso_1_code": null, "iso_3_code": "kpi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3735", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nisa", "iso_1_code": null, "iso_3_code": "njs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3736", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sauri", "iso_1_code": null, "iso_3_code": "srt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3737", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tefaro", "iso_1_code": null, "iso_3_code": "tfo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3738", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tunggare", "iso_1_code": null, "iso_3_code": "trt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3739", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Woria", "iso_1_code": null, "iso_3_code": "wor", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3740", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bauzi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bauzi", "iso_1_code": null, "iso_3_code": "bvz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3742", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Demisa", "iso_1_code": null, "iso_3_code": "dei", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3743", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3741", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3730", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/East New Britain.json b/data/East New Britain.json index 03c7037618d0ec00f4a35f529df048f3d9cd9303..e5ff91418d465e56fdf7c255403bcc138d4e7a46 100644 --- a/data/East New Britain.json +++ b/data/East New Britain.json @@ -2,104 +2,104 @@ "name": "East New Britain", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Baining", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Qaqet", "iso_1_code": null, "iso_3_code": "byx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3746", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kairak", "iso_1_code": null, "iso_3_code": "ckr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3747", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mali", "iso_1_code": null, "iso_3_code": "gcc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3748", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Simbali", "iso_1_code": null, "iso_3_code": "smg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3749", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ura", "iso_1_code": null, "iso_3_code": "uro", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3750", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makolkol", "iso_1_code": null, "iso_3_code": "zmh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3751", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3745", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Taulil", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tulil", "iso_1_code": null, "iso_3_code": "tuh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3753", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3752", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3744", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Eastern Trans-Fly.json b/data/Eastern Trans-Fly.json index 5dde51991a5b0049df1b8b91efa8adaa8102cdc0..b8241944ee96dd6222a0118bc721b8cf406aeec6 100644 --- a/data/Eastern Trans-Fly.json +++ b/data/Eastern Trans-Fly.json @@ -2,54 +2,54 @@ "name": "Eastern Trans-Fly", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bine", "iso_1_code": null, "iso_3_code": "bon", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3755", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Wipi", "iso_1_code": null, "iso_3_code": "gdr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3756", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Gizrra", "iso_1_code": null, "iso_3_code": "tof", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3757", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Meriam Mir", "iso_1_code": null, "iso_3_code": "ulk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3758", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3754", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Eskimo-Aleut.json b/data/Eskimo-Aleut.json index 56f5ad0284b4a55f5bae0cc36612fcdd7b6986bb..ed382da9be41656535f6ba2164db14c2119bd695 100644 --- a/data/Eskimo-Aleut.json +++ b/data/Eskimo-Aleut.json @@ -2,189 +2,189 @@ "name": "Eskimo-Aleut", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aleut", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aleut", "iso_1_code": null, "iso_3_code": "ale", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3761", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3760", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eskimo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Inuit-Inupiaq", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Inupiatun, North Alaskan", "iso_1_code": "ik", "iso_3_code": "esi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3764", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Inupiatun, Northwest Alaska", "iso_1_code": "ik", "iso_3_code": "esk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3765", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Inuktitut, Eastern Canadian", "iso_1_code": "iu", "iso_3_code": "ike", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3766", + "native_tokenizers": [], "scripts": [ "Cans" - ], - "own_tokenizer": false + ] }, { "name": "Inuinnaqtun", "iso_1_code": "iu", "iso_3_code": "ikt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3767", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Greenlandic", "iso_1_code": "kl", "iso_3_code": "kal", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3768", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "3763", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yupik", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yupik, Saint Lawrence Island", "iso_1_code": null, "iso_3_code": "ess", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3770", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yupik, Naukan", "iso_1_code": null, "iso_3_code": "ynk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3771", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yupik, Sirenik", "iso_1_code": null, "iso_3_code": "ysr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3772", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Alaskan Yupik", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yupik, Pacific Gulf", "iso_1_code": null, "iso_3_code": "ems", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3774", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yupik, Central", "iso_1_code": null, "iso_3_code": "esu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3775", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "3773", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3769", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3762", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3759", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Eyak-Athabaskan.json b/data/Eyak-Athabaskan.json index cdb5b25f480d5cd63fbf75b93541f5910f359d1c..ab9db9be76e613f0cd68ac30b08dfa12405c1151 100644 --- a/data/Eyak-Athabaskan.json +++ b/data/Eyak-Athabaskan.json @@ -2,648 +2,648 @@ "name": "Eyak-Athabaskan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Eyak", "iso_1_code": null, "iso_3_code": "eya", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3777", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Athabaskan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Apachean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Navajo", "iso_1_code": "nv", "iso_3_code": "nav", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3780", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Apache", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Apache, Jicarilla", "iso_1_code": null, "iso_3_code": "apj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3782", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Apache, Kiowa", "iso_1_code": null, "iso_3_code": "apk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3783", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Apache, Lipan", "iso_1_code": null, "iso_3_code": "apl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3784", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Apache, Mescalero-Chiricahua", "iso_1_code": null, "iso_3_code": "apm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3785", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Apache, Western", "iso_1_code": null, "iso_3_code": "apw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3786", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "3781", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3779", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern Athabaskan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ahtena", "iso_1_code": null, "iso_3_code": "aht", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3788", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Babine", "iso_1_code": null, "iso_3_code": "bcr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3789", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Beaver", "iso_1_code": null, "iso_3_code": "bea", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3790", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Dene", "iso_1_code": null, "iso_3_code": "chp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3791", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chilcotin", "iso_1_code": null, "iso_3_code": "clc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3792", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tlicho", "iso_1_code": null, "iso_3_code": "dgr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3793", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Gwich\u2019in", "iso_1_code": null, "iso_3_code": "gwi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3794", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Han", "iso_1_code": null, "iso_3_code": "haa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3795", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Holikachuk", "iso_1_code": null, "iso_3_code": "hoi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3796", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Deg Xinag", "iso_1_code": null, "iso_3_code": "ing", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3797", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koyukon", "iso_1_code": null, "iso_3_code": "koy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3798", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuskokwim, Upper", "iso_1_code": null, "iso_3_code": "kuu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3799", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sekani", "iso_1_code": null, "iso_3_code": "sek", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3800", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sarsi", "iso_1_code": null, "iso_3_code": "srs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3801", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tanana, Lower", "iso_1_code": null, "iso_3_code": "taa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3802", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tanana, Upper", "iso_1_code": null, "iso_3_code": "tau", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3803", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tanacross", "iso_1_code": null, "iso_3_code": "tcb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3804", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tanaina", "iso_1_code": null, "iso_3_code": "tfn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3805", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tsetsaut", "iso_1_code": null, "iso_3_code": "txc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3806", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Carrier", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Carrier, Southern", "iso_1_code": null, "iso_3_code": "caf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3808", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Carrier", "iso_1_code": null, "iso_3_code": "crx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3809", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "3807", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Slavey-Hare", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Slavey, North", "iso_1_code": null, "iso_3_code": "scs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3811", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Slavey, South", "iso_1_code": null, "iso_3_code": "xsl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3812", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3810", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tahltan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kaska", "iso_1_code": null, "iso_3_code": "kkz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3814", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tagish", "iso_1_code": null, "iso_3_code": "tgx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3815", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tahltan", "iso_1_code": null, "iso_3_code": "tht", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3816", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3813", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tuchone", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tutchone, Southern", "iso_1_code": null, "iso_3_code": "tce", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3818", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tutchone, Northern", "iso_1_code": null, "iso_3_code": "ttm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3819", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3817", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3787", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pacific Coast Athabaskan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kwalhioqua-Tlatskanai", "iso_1_code": null, "iso_3_code": "qwt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3821", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "California Athabaskan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hupa", "iso_1_code": null, "iso_3_code": "hup", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3823", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kato", "iso_1_code": null, "iso_3_code": "ktw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3824", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mattole", "iso_1_code": null, "iso_3_code": "mvb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3825", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wailaki", "iso_1_code": null, "iso_3_code": "wlk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3826", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3822", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oregon Athabaskan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Galice", "iso_1_code": null, "iso_3_code": "gce", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3828", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Upper Umpqua", "iso_1_code": null, "iso_3_code": "xup", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3829", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tolowa-Chetco", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chetco", "iso_1_code": null, "iso_3_code": "ctc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3831", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tolowa", "iso_1_code": null, "iso_3_code": "tol", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3832", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3830", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tututni-Chasta Costa-Coquille", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Coquille", "iso_1_code": null, "iso_3_code": "coq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3834", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tututni", "iso_1_code": null, "iso_3_code": "tuu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3835", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3833", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3827", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3820", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3778", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tlingit", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tlingit", "iso_1_code": null, "iso_3_code": "tli", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3837", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3836", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3776", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Fas.json b/data/Fas.json index d1e3afd11b986f365b5d4fb784809b33d392c65e..39a3732a5caa6a7bb7b81adbb4d297f643068f02 100644 --- a/data/Fas.json +++ b/data/Fas.json @@ -2,30 +2,30 @@ "name": "Fas", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Baibai", "iso_1_code": null, "iso_3_code": "bbf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3839", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Momu", "iso_1_code": null, "iso_3_code": "fqs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3840", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3838", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Guajiboan.json b/data/Guajiboan.json index f079c78d771bbe17eb78863747020793008ee8d7..8b80d8cffcdcb278fa690e5f8abe98856c25cfef 100644 --- a/data/Guajiboan.json +++ b/data/Guajiboan.json @@ -2,77 +2,77 @@ "name": "Guajiboan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cuiba", "iso_1_code": null, "iso_3_code": "cui", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3842", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Guayabero", "iso_1_code": null, "iso_3_code": "guo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3843", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Guajibo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Playero", "iso_1_code": null, "iso_3_code": "gob", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3845", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Guahibo", "iso_1_code": null, "iso_3_code": "guh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3846", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Macagu\u00e1n", "iso_1_code": null, "iso_3_code": "mbn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3847", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3844", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3841", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Guaykuruan.json b/data/Guaykuruan.json index f197e498db2b4abf80e38247380b5a0a4a108984..c28cd75a192ee4c699bff7789d783721f410b765 100644 --- a/data/Guaykuruan.json +++ b/data/Guaykuruan.json @@ -2,90 +2,90 @@ "name": "Guaykuruan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Guaykur\u00fa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Abipon", "iso_1_code": null, "iso_3_code": "axb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3850", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kadiw\u00e9u", "iso_1_code": null, "iso_3_code": "kbc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3851", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "3849", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mocov\u00ed", "iso_1_code": null, "iso_3_code": "moc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3853", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Pilag\u00e1", "iso_1_code": null, "iso_3_code": "plg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3854", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Toba", "iso_1_code": null, "iso_3_code": "tob", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3855", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "3852", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3848", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Gum.json b/data/Gum.json index 6978c139a98e2404815b76ea649d1056221c6b04..ffff97f25c21c9bd5c19b297571af9533556f184 100644 --- a/data/Gum.json +++ b/data/Gum.json @@ -2,9 +2,9 @@ "name": "Gum", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3856", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Haida.json b/data/Haida.json index f2055ec455656050da85d82d5cf0c6fefa4f6041..8fc9c8927b2dbbea17e605afa3aae512e6fc606a 100644 --- a/data/Haida.json +++ b/data/Haida.json @@ -2,30 +2,30 @@ "name": "Haida", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Haida, Southern", "iso_1_code": null, "iso_3_code": "hax", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3858", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Haida, Northern", "iso_1_code": null, "iso_3_code": "hdn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3859", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3857", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git "a/data/Har\303\241kmbut.json" "b/data/Har\303\241kmbut.json" index 3835a5367ea7e3f78944531fead1f0145fbb5204..c251545926c32a28faa3881b144d127cce281440 100644 --- "a/data/Har\303\241kmbut.json" +++ "b/data/Har\303\241kmbut.json" @@ -2,32 +2,32 @@ "name": "Har\u00e1kmbut", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Amarakaeri", "iso_1_code": null, "iso_3_code": "amr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3861", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Huachipaeri", "iso_1_code": null, "iso_3_code": "hug", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3862", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3860", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Hmong-Mien.json b/data/Hmong-Mien.json index 04d2cb2a28e603fae657bb1bc31a3b784ff42391..28b10277b538f4dda0314dfc6e94230b08263a3b 100644 --- a/data/Hmong-Mien.json +++ b/data/Hmong-Mien.json @@ -2,527 +2,527 @@ "name": "Hmong-Mien", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hmongic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bunu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bunu, Younuo", "iso_1_code": null, "iso_3_code": "buh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3866", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bunu, Wunai", "iso_1_code": null, "iso_3_code": "bwn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3867", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bunu, Bu-Nao", "iso_1_code": null, "iso_3_code": "bwx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3868", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bunu, Jiongnai", "iso_1_code": null, "iso_3_code": "pnu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3869", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3865", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chuanqiandian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Miao, Chuanqiandian Cluster", "iso_1_code": null, "iso_3_code": "cqd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3871", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miao, Southern Mashan", "iso_1_code": null, "iso_3_code": "hma", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3872", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miao, Central Huishui", "iso_1_code": null, "iso_3_code": "hmc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3873", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miao, Large Flowery", "iso_1_code": null, "iso_3_code": "hmd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3874", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miao, Eastern Huishui", "iso_1_code": null, "iso_3_code": "hme", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3875", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hmong Don", "iso_1_code": null, "iso_3_code": "hmf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3876", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miao, Southwestern Guiyang", "iso_1_code": null, "iso_3_code": "hmg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3877", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miao, Southwestern Huishui", "iso_1_code": null, "iso_3_code": "hmh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3878", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miao, Northern Huishui", "iso_1_code": null, "iso_3_code": "hmi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3879", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ge", "iso_1_code": null, "iso_3_code": "hmj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3880", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miao, Luopohe", "iso_1_code": null, "iso_3_code": "hml", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3881", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miao, Central Mashan", "iso_1_code": null, "iso_3_code": "hmm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3882", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miao, Northern Mashan", "iso_1_code": null, "iso_3_code": "hmp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3883", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hmong D\u00f4", "iso_1_code": null, "iso_3_code": "hmv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3884", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miao, Western Mashan", "iso_1_code": null, "iso_3_code": "hmw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3885", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miao, Southern Guiyang", "iso_1_code": null, "iso_3_code": "hmy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3886", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sinicized Miao", "iso_1_code": null, "iso_3_code": "hmz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3887", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hmong Njua", "iso_1_code": null, "iso_3_code": "hnj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3888", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Miao, Horned", "iso_1_code": null, "iso_3_code": "hrm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3889", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miao, Northern Guiyang", "iso_1_code": null, "iso_3_code": "huj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3890", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hmong Daw", "iso_1_code": null, "iso_3_code": "mww", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3891", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Miao, Small Flowery", "iso_1_code": null, "iso_3_code": "sfm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3892", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3870", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pa-hng", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pa-Hng", "iso_1_code": null, "iso_3_code": "pha", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3894", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3893", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Qiandong", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Miao, Northern Qiandong", "iso_1_code": null, "iso_3_code": "hea", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3896", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miao, Eastern Qiandong", "iso_1_code": null, "iso_3_code": "hmq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3897", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miao, Southern Qiandong", "iso_1_code": null, "iso_3_code": "hms", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3898", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "N\u00e1-Meo", "iso_1_code": null, "iso_3_code": "neo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3899", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3895", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Xiangxi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Miao, Western Xiangxi", "iso_1_code": null, "iso_3_code": "mmr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3901", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miao, Eastern Xiangxi", "iso_1_code": null, "iso_3_code": "muq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3902", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3900", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3864", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ho Nte", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "She", "iso_1_code": null, "iso_3_code": "shx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3904", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3903", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mienic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Biao-Jiao", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Biao-Jiao Mien", "iso_1_code": null, "iso_3_code": "bje", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3907", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3906", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mian-Jin", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Biao Mon", "iso_1_code": null, "iso_3_code": "bmt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3909", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Iu Mien", "iso_1_code": null, "iso_3_code": "ium", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3910", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kim Mun", "iso_1_code": null, "iso_3_code": "mji", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3911", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3908", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zaomin", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dzao Min", "iso_1_code": null, "iso_3_code": "bpn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3913", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3912", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3905", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3863", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Huavean.json b/data/Huavean.json index c368d20c4a8f1f036e2f850b5d872af30af43bec..092796bc88ab712db5e10d1f11671ebd146d8a3b 100644 --- a/data/Huavean.json +++ b/data/Huavean.json @@ -2,52 +2,52 @@ "name": "Huavean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Huave, San Francisco del Mar", "iso_1_code": null, "iso_3_code": "hue", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3915", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Huave, San Mateo del Mar", "iso_1_code": null, "iso_3_code": "huv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3916", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Huave, San Dionisio del Mar", "iso_1_code": null, "iso_3_code": "hve", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3917", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Huave, Santa Mar\u00eda del Mar", "iso_1_code": null, "iso_3_code": "hvv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3918", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3914", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Indo-European.json b/data/Indo-European.json index 49d0ab069a1818943756ca560b6c3a3b093430fb..479c29f9c971a63d890954cf38b4d091c78f685d 100644 --- a/data/Indo-European.json +++ b/data/Indo-European.json @@ -2,11252 +2,9644 @@ "name": "Indo-European", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"en\")", - "original_lang_name": "english", - "original_lang_code": "eng", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Armn": { - "full_object": "SpaCyTokenizer(\"hy\")", - "original_lang_name": "armenian", - "original_lang_code": "hye", - "scripts": [ - "Armn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Cyrl": { - "full_object": "SpaCyTokenizer(\"ru\")", - "original_lang_name": "russian", - "original_lang_code": "rus", - "scripts": [ - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Grek": { - "full_object": "SpaCyTokenizer(\"el\")", - "original_lang_name": "greek", - "original_lang_code": "ell", - "scripts": [ - "Grek" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Deva": { - "full_object": "SpaCyTokenizer(\"hi\")", - "original_lang_name": "hindi", - "original_lang_code": "hin", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Gujr": { - "full_object": "SpaCyTokenizer(\"gu\")", - "original_lang_name": "gujarati", - "original_lang_code": "guj", - "scripts": [ - "Latn", - "Gujr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Guru": { - "full_object": "IndicNLPTokenizer(\"pa\")", - "original_lang_name": "punjabi", - "original_lang_code": "pan", - "scripts": [ - "Latn", - "Guru" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - }, - "Beng": { - "full_object": "SpaCyTokenizer(\"bn\")", - "original_lang_name": "bengali", - "original_lang_code": "ben", - "scripts": [ - "Latn", - "Beng" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Orya": { - "full_object": "IndicNLPTokenizer(\"or\")", - "original_lang_name": "oriya", - "original_lang_code": "ori", - "scripts": [ - "Latn", - "Orya" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"fa\")", - "original_lang_name": "persian", - "original_lang_code": "fas", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Sinh": { - "full_object": "SpaCyTokenizer(\"si\")", - "original_lang_name": "sinhala", - "original_lang_code": "sin", - "scripts": [ - "Sinh" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Albanian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"sq\")", - "original_lang_name": "albanian", - "original_lang_code": "sqi", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Gheg", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"sq\")", - "original_lang_name": "albanian", - "original_lang_code": "sqi", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Albanian, Gheg", "iso_1_code": "sq", "iso_3_code": "aln", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"sq\")", - "original_lang_name": "albanian", - "original_lang_code": "sqi", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"en\")", + "original_lang_name": "english", + "original_lang_code": "eng", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3922", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": true + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"en\")", + "original_lang_name": "english", + "original_lang_code": "eng", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3921", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tosk", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"sq\")", - "original_lang_name": "albanian", - "original_lang_code": "sqi", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Albanian, Arb\u00ebresh\u00eb", "iso_1_code": "sq", "iso_3_code": "aae", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"sq\")", - "original_lang_name": "albanian", - "original_lang_code": "sqi", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "3924", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Albanian, Arvanitika", "iso_1_code": "sq", "iso_3_code": "aat", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"sq\")", - "original_lang_name": "albanian", - "original_lang_code": "sqi", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "3925", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Albanian, Tosk", "iso_1_code": "sq", "iso_3_code": "als", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"sq\")", - "original_lang_name": "albanian", - "original_lang_code": "sqi", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"en\")", + "original_lang_name": "english", + "original_lang_code": "eng", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3926", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": true + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"en\")", + "original_lang_name": "english", + "original_lang_code": "eng", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3923", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"en\")", + "original_lang_name": "english", + "original_lang_code": "eng", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3920", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Armenian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Armn": { - "full_object": "SpaCyTokenizer(\"hy\")", - "original_lang_name": "armenian", - "original_lang_code": "hye", - "scripts": [ - "Armn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Armenian", "iso_1_code": "hy", "iso_3_code": "hye", + "children": [], "tokenizers": { "Armn": { "full_object": "SpaCyTokenizer(\"hy\")", "original_lang_name": "armenian", "original_lang_code": "hye", - "scripts": [ - "Armn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Armn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3928", - "scripts": [ + "native_tokenizers": [ "Armn" ], - "own_tokenizer": true + "scripts": [ + "Armn" + ] }, { "name": "Armenian, Western", "iso_1_code": null, "iso_3_code": "hyw", + "children": [], "tokenizers": { "Armn": { "full_object": "StanzaTokenizer(\"hyw\")", "original_lang_name": "western_armenian", "original_lang_code": "hyw", - "scripts": [ - "Armn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Armn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3929", - "scripts": [ + "native_tokenizers": [ "Armn" ], - "own_tokenizer": true + "scripts": [ + "Armn" + ] } ], + "tokenizers": { + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "script": "Armn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3927", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Balto-Slavic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"pl\")", - "original_lang_name": "polish", - "original_lang_code": "pol", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Cyrl": { - "full_object": "SpaCyTokenizer(\"ru\")", - "original_lang_name": "russian", - "original_lang_code": "rus", - "scripts": [ - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Baltic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"lt\")", - "original_lang_name": "lithuanian", - "original_lang_code": "lit", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"lt\")", - "original_lang_name": "lithuanian", - "original_lang_code": "lit", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Lithuanian", "iso_1_code": "lt", "iso_3_code": "lit", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lt\")", "original_lang_name": "lithuanian", "original_lang_code": "lit", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3933", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] }, { "name": "Latgalian", "iso_1_code": "lv", "iso_3_code": "ltg", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"lv\")", - "original_lang_name": "latvian", - "original_lang_code": "lav", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"lt\")", + "original_lang_name": "lithuanian", + "original_lang_code": "lit", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3934", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": true + ] }, { "name": "Latvian, Standard", "iso_1_code": "lv", "iso_3_code": "lvs", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"lv\")", - "original_lang_name": "latvian", - "original_lang_code": "lav", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"lt\")", + "original_lang_name": "lithuanian", + "original_lang_code": "lit", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3935", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": true + ] }, { "name": "Samogitian", "iso_1_code": null, "iso_3_code": "sgs", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lt\")", "original_lang_name": "lithuanian", "original_lang_code": "lit", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3936", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zemgalian", "iso_1_code": null, "iso_3_code": "xzm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3937", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "3932", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Western", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lt\")", "original_lang_name": "lithuanian", "original_lang_code": "lit", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "3932", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Western", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Prussian", "iso_1_code": null, "iso_3_code": "prg", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lt\")", "original_lang_name": "lithuanian", "original_lang_code": "lit", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3939", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sudovian", "iso_1_code": null, "iso_3_code": "xsv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3940", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lt\")", + "original_lang_name": "lithuanian", + "original_lang_code": "lit", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3938", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lt\")", + "original_lang_name": "lithuanian", + "original_lang_code": "lit", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3931", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Slavic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Cyrl": { - "full_object": "SpaCyTokenizer(\"ru\")", - "original_lang_name": "russian", - "original_lang_code": "rus", - "scripts": [ - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Latn": { - "full_object": "SpaCyTokenizer(\"pl\")", - "original_lang_name": "polish", - "original_lang_code": "pol", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Cyrl": { - "full_object": "SpaCyTokenizer(\"ru\")", - "original_lang_name": "russian", - "original_lang_code": "rus", - "scripts": [ - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Belarusian", "iso_1_code": "be", "iso_3_code": "bel", + "children": [], "tokenizers": { "Cyrl": { "full_object": "StanzaTokenizer(\"be\")", "original_lang_name": "belarusian", "original_lang_code": "bel", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3943", - "scripts": [ + "native_tokenizers": [ "Cyrl" ], - "own_tokenizer": true + "scripts": [ + "Cyrl" + ] }, { "name": "Ruthenian", "iso_1_code": null, "iso_3_code": "rsk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3944", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rusyn", "iso_1_code": null, "iso_3_code": "rue", + "children": [], "tokenizers": { "Cyrl": { "full_object": "SpaCyTokenizer(\"ru\")", "original_lang_name": "russian", "original_lang_code": "rus", - "scripts": [ - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3945", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Russian", "iso_1_code": "ru", "iso_3_code": "rus", + "children": [], "tokenizers": { "Cyrl": { "full_object": "SpaCyTokenizer(\"ru\")", "original_lang_name": "russian", "original_lang_code": "rus", - "scripts": [ - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3946", - "scripts": [ + "native_tokenizers": [ "Cyrl" ], - "own_tokenizer": true + "scripts": [ + "Cyrl" + ] }, { "name": "Ukrainian", "iso_1_code": "uk", "iso_3_code": "ukr", + "children": [], "tokenizers": { "Cyrl": { "full_object": "SpaCyTokenizer(\"uk\")", "original_lang_name": "ukrainian", "original_lang_code": "ukr", - "scripts": [ - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3947", - "scripts": [ + "native_tokenizers": [ "Cyrl" ], - "own_tokenizer": true + "scripts": [ + "Cyrl" + ] } ], + "tokenizers": { + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3942", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Cyrl": { - "full_object": "SpaCyTokenizer(\"bg\")", - "original_lang_name": "bulgarian", - "original_lang_code": "bul", - "scripts": [ - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Latn": { - "full_object": "SpaCyTokenizer(\"sr\")", - "original_lang_name": "serbocroatian", - "original_lang_code": "hbs", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Cyrl": { - "full_object": "SpaCyTokenizer(\"bg\")", - "original_lang_name": "bulgarian", - "original_lang_code": "bul", - "scripts": [ - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bulgarian", "iso_1_code": "bg", "iso_3_code": "bul", + "children": [], "tokenizers": { "Cyrl": { "full_object": "SpaCyTokenizer(\"bg\")", "original_lang_name": "bulgarian", "original_lang_code": "bul", - "scripts": [ - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3950", - "scripts": [ + "native_tokenizers": [ "Cyrl" ], - "own_tokenizer": true + "scripts": [ + "Cyrl" + ] }, { "name": "Slavonic, Church", "iso_1_code": "cu", "iso_3_code": "chu", + "children": [], "tokenizers": { "Cyrl": { "full_object": "StanzaTokenizer(\"cu\")", - "original_lang_name": "old_bulgarian", + "original_lang_name": "church_slavonic", "original_lang_code": "chu", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3951", - "scripts": [ + "native_tokenizers": [ "Cyrl" ], - "own_tokenizer": true + "scripts": [ + "Cyrl" + ] }, { "name": "Macedonian", "iso_1_code": "mk", "iso_3_code": "mkd", + "children": [], "tokenizers": { "Cyrl": { "full_object": "SpaCyTokenizer(\"mk\")", "original_lang_name": "macedonian", "original_lang_code": "mkd", - "scripts": [ - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3952", - "scripts": [ + "native_tokenizers": [ "Cyrl" ], - "own_tokenizer": true + "scripts": [ + "Cyrl" + ] } ], + "tokenizers": { + "Cyrl": { + "full_object": "SpaCyTokenizer(\"bg\")", + "original_lang_name": "bulgarian", + "original_lang_code": "bul", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3949", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"sr\")", - "original_lang_name": "serbocroatian", - "original_lang_code": "hbs", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Cyrl": { - "full_object": "SpaCyTokenizer(\"sr\")", - "original_lang_name": "serbian", - "original_lang_code": "srp", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bosnian", "iso_1_code": "bs", "iso_3_code": "bos", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"sr\")", - "original_lang_name": "serbocroatian", - "original_lang_code": "hbs", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"hr\")", + "original_lang_name": "croatian", + "original_lang_code": "hrv", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3954", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": true + ] }, { "name": "Chakavian", "iso_1_code": null, "iso_3_code": "ckm", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"sr\")", - "original_lang_name": "serbocroatian", - "original_lang_code": "hbs", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"hr\")", + "original_lang_name": "croatian", + "original_lang_code": "hrv", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3955", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Montenegrin", "iso_1_code": "sh", "iso_3_code": "cnr", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"sr\")", - "original_lang_name": "serbocroatian", - "original_lang_code": "hbs", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"hr\")", + "original_lang_name": "croatian", + "original_lang_code": "hrv", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3956", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": true + ] }, { "name": "Croatian", "iso_1_code": "hr", "iso_3_code": "hrv", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"hr\")", "original_lang_name": "croatian", "original_lang_code": "hrv", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3957", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] }, { "name": "Slovene", "iso_1_code": "sl", "iso_3_code": "slv", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"sl\")", "original_lang_name": "slovenian", "original_lang_code": "slv", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3958", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] }, { "name": "Serbian", "iso_1_code": "sr", "iso_3_code": "srp", + "children": [], "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"sr\")", - "original_lang_name": "serbian", - "original_lang_code": "srp", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, "Cyrl": { "full_object": "SpaCyTokenizer(\"sr\")", "original_lang_name": "serbian", "original_lang_code": "srp", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"hr\")", + "original_lang_name": "croatian", + "original_lang_code": "hrv", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3959", - "scripts": [ - "Latn", + "native_tokenizers": [ "Cyrl" ], - "own_tokenizer": true + "scripts": [ + "Cyrl", + "Latn" + ] }, { "name": "Slavomolisano", "iso_1_code": null, "iso_3_code": "svm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3960", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"hr\")", + "original_lang_name": "croatian", + "original_lang_code": "hrv", + "script": "Latn", + "class_name": "SpaCyTokenizer" + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"sr\")", + "original_lang_name": "serbian", + "original_lang_code": "srp", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3953", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Cyrl": { + "full_object": "SpaCyTokenizer(\"bg\")", + "original_lang_name": "bulgarian", + "original_lang_code": "bul", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"hr\")", + "original_lang_name": "croatian", + "original_lang_code": "hrv", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3948", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"pl\")", - "original_lang_name": "polish", - "original_lang_code": "pol", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Czech-Slovak", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"cs\")", - "original_lang_name": "czech", - "original_lang_code": "ces", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Czech", "iso_1_code": "cs", "iso_3_code": "ces", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"cs\")", "original_lang_name": "czech", "original_lang_code": "ces", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3963", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] }, { "name": "Knaanic", "iso_1_code": null, "iso_3_code": "czk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3964", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Slovak", "iso_1_code": "sk", "iso_3_code": "slk", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"sk\")", "original_lang_name": "slovak", "original_lang_code": "slk", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3965", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"cs\")", + "original_lang_name": "czech", + "original_lang_code": "ces", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3962", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lechitic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"pl\")", - "original_lang_name": "polish", - "original_lang_code": "pol", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kashubian", "iso_1_code": null, "iso_3_code": "csb", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"pl\")", "original_lang_name": "polish", "original_lang_code": "pol", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3967", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Polish", "iso_1_code": "pl", "iso_3_code": "pol", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"pl\")", "original_lang_name": "polish", "original_lang_code": "pol", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3968", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] }, { "name": "Polabian", "iso_1_code": null, "iso_3_code": "pox", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3969", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Silesian", "iso_1_code": null, "iso_3_code": "szl", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"pl\")", "original_lang_name": "polish", "original_lang_code": "pol", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3970", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"pl\")", + "original_lang_name": "polish", + "original_lang_code": "pol", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3966", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sorbian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"hsb\")", - "original_lang_name": "upper_sorbian", - "original_lang_code": "hsb", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Sorbian, Lower", "iso_1_code": null, "iso_3_code": "dsb", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"dsb\")", "original_lang_name": "lower_sorbian", "original_lang_code": "dsb", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3972", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] }, { "name": "Sorbian, Upper", "iso_1_code": null, "iso_3_code": "hsb", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"hsb\")", "original_lang_name": "upper_sorbian", "original_lang_code": "hsb", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3973", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"hsb\")", + "original_lang_name": "upper_sorbian", + "original_lang_code": "hsb", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3971", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"pl\")", + "original_lang_name": "polish", + "original_lang_code": "pol", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3961", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"pl\")", + "original_lang_name": "polish", + "original_lang_code": "pol", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3941", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"pl\")", + "original_lang_name": "polish", + "original_lang_code": "pol", + "script": "Latn", + "class_name": "SpaCyTokenizer" + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3930", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Celtic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"cy\")", - "original_lang_name": "welsh", - "original_lang_code": "cym", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Insular", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"cy\")", - "original_lang_name": "welsh", - "original_lang_code": "cym", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Brythonic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"cy\")", - "original_lang_name": "welsh", - "original_lang_code": "cym", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Breton", "iso_1_code": "br", "iso_3_code": "bre", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"cy\")", "original_lang_name": "welsh", "original_lang_code": "cym", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3977", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Cornish", "iso_1_code": "kw", "iso_3_code": "cor", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"cy\")", "original_lang_name": "welsh", "original_lang_code": "cym", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3978", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Welsh", "iso_1_code": "cy", "iso_3_code": "cym", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"cy\")", "original_lang_name": "welsh", "original_lang_code": "cym", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3979", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"cy\")", + "original_lang_name": "welsh", + "original_lang_code": "cym", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "3976", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Goidelic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ga\")", - "original_lang_name": "irish", - "original_lang_code": "gle", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Scottish Gaelic", "iso_1_code": "gd", "iso_3_code": "gla", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"gd\")", "original_lang_name": "gaelic", "original_lang_code": "gla", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3981", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] }, { "name": "Irish", "iso_1_code": "ga", "iso_3_code": "gle", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"ga\")", "original_lang_name": "irish", "original_lang_code": "gle", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3982", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] }, { "name": "Manx", "iso_1_code": "gv", "iso_3_code": "glv", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"gv\")", "original_lang_name": "manx", "original_lang_code": "glv", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "3983", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ga\")", + "original_lang_name": "irish", + "original_lang_code": "gle", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3980", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"cy\")", + "original_lang_name": "welsh", + "original_lang_code": "cym", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "3975", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"cy\")", + "original_lang_name": "welsh", + "original_lang_code": "cym", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "3974", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Germanic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"en\")", - "original_lang_name": "english", - "original_lang_code": "eng", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"sv\")", - "original_lang_name": "swedish", - "original_lang_code": "swe", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "East Scandinavian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"sv\")", - "original_lang_name": "swedish", - "original_lang_code": "swe", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "\u00d6vdalian", "iso_1_code": null, "iso_3_code": "ovd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3987", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Danish-Swedish", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"sv\")", - "original_lang_name": "swedish", - "original_lang_code": "swe", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Danish-Bokmal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Norwegian", "iso_1_code": "no", "iso_3_code": "nor", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3990", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3989", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Danish-Riksmal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"da\")", - "original_lang_name": "danish", - "original_lang_code": "dan", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Danish", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"da\")", - "original_lang_name": "danish", - "original_lang_code": "dan", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Danish", "iso_1_code": "da", "iso_3_code": "dan", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"da\")", "original_lang_name": "danish", "original_lang_code": "dan", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3993", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"da\")", + "original_lang_name": "danish", + "original_lang_code": "dan", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3992", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"da\")", + "original_lang_name": "danish", + "original_lang_code": "dan", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3991", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Swedish", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"sv\")", - "original_lang_name": "swedish", - "original_lang_code": "swe", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Swedish", "iso_1_code": "sv", "iso_3_code": "swe", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"sv\")", "original_lang_name": "swedish", "original_lang_code": "swe", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3995", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"sv\")", + "original_lang_name": "swedish", + "original_lang_code": "swe", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3994", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"sv\")", + "original_lang_name": "swedish", + "original_lang_code": "swe", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3988", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"sv\")", + "original_lang_name": "swedish", + "original_lang_code": "swe", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3986", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Scandinavian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"is\")", - "original_lang_name": "icelandic", - "original_lang_code": "isl", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Faroese", "iso_1_code": "fo", "iso_3_code": "fao", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"fo\")", "original_lang_name": "faroese", "original_lang_code": "fao", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3997", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] }, { "name": "Icelandic", "iso_1_code": "is", "iso_3_code": "isl", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"is\")", "original_lang_name": "icelandic", "original_lang_code": "isl", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3998", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] }, { "name": "Norn", "iso_1_code": null, "iso_3_code": "nrn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3999", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"is\")", + "original_lang_name": "icelandic", + "original_lang_code": "isl", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3996", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"sv\")", + "original_lang_name": "swedish", + "original_lang_code": "swe", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3985", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"en\")", - "original_lang_name": "english", - "original_lang_code": "eng", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "English", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"en\")", - "original_lang_name": "english", - "original_lang_code": "eng", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "English", "iso_1_code": "en", "iso_3_code": "eng", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"en\")", "original_lang_name": "english", "original_lang_code": "eng", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4002", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] }, { "name": "Scots", "iso_1_code": null, "iso_3_code": "sco", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"en\")", "original_lang_name": "english", "original_lang_code": "eng", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4003", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yola", "iso_1_code": null, "iso_3_code": "yol", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4004", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "4001", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Frisian", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"en\")", "original_lang_name": "english", "original_lang_code": "eng", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "4001", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Frisian", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Frisian, Northern", "iso_1_code": null, "iso_3_code": "frr", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"en\")", "original_lang_name": "english", "original_lang_code": "eng", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4006", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Frisian", "iso_1_code": "fy", "iso_3_code": "fry", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"en\")", "original_lang_name": "english", "original_lang_code": "eng", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4007", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Saterfriesisch", "iso_1_code": null, "iso_3_code": "stq", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"en\")", "original_lang_name": "english", "original_lang_code": "eng", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4008", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"en\")", + "original_lang_name": "english", + "original_lang_code": "eng", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4005", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "High German", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"de\")", - "original_lang_name": "german", - "original_lang_code": "deu", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "German", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"de\")", - "original_lang_name": "german", - "original_lang_code": "deu", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Hunsrik", "iso_1_code": null, "iso_3_code": "hrx", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"de\")", "original_lang_name": "german", "original_lang_code": "deu", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4011", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Middle German", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"de\")", - "original_lang_name": "german", - "original_lang_code": "deu", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "East Middle German", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"de\")", - "original_lang_name": "german", - "original_lang_code": "deu", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "German, Standard", "iso_1_code": "de", "iso_3_code": "deu", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"de\")", "original_lang_name": "german", "original_lang_code": "deu", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4014", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] }, { "name": "Silesian, Lower", "iso_1_code": null, "iso_3_code": "sli", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4015", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saxon, Upper", "iso_1_code": null, "iso_3_code": "sxu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4016", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wymysorys", "iso_1_code": null, "iso_3_code": "wym", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4017", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"de\")", + "original_lang_name": "german", + "original_lang_code": "deu", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4013", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Middle German", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"lb\")", - "original_lang_name": "luxembourgish", - "original_lang_code": "ltz", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ripuarian", "iso_1_code": null, "iso_3_code": "ksh", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lb\")", "original_lang_name": "luxembourgish", "original_lang_code": "ltz", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4019", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "German, Pennsylvania", "iso_1_code": null, "iso_3_code": "pdc", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lb\")", "original_lang_name": "luxembourgish", "original_lang_code": "ltz", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4020", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Palatinate Franconian", "iso_1_code": null, "iso_3_code": "pfl", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lb\")", "original_lang_name": "luxembourgish", "original_lang_code": "ltz", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4021", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Moselle Franconian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"lb\")", - "original_lang_name": "luxembourgish", - "original_lang_code": "ltz", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Luxembourgish", "iso_1_code": "lb", "iso_3_code": "ltz", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lb\")", "original_lang_name": "luxembourgish", "original_lang_code": "ltz", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4023", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lb\")", + "original_lang_name": "luxembourgish", + "original_lang_code": "ltz", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4022", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lb\")", + "original_lang_name": "luxembourgish", + "original_lang_code": "ltz", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4018", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "4012", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Upper German", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"de\")", "original_lang_name": "german", "original_lang_code": "deu", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "4012", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Upper German", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Eastern Franconian", "iso_1_code": null, "iso_3_code": "vmf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4025", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Alemannic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"de\")", - "original_lang_name": "german", - "original_lang_code": "deu", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "German, Colonia Tovar", "iso_1_code": null, "iso_3_code": "gct", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4027", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "German, Swiss", "iso_1_code": null, "iso_3_code": "gsw", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"de\")", "original_lang_name": "german", "original_lang_code": "deu", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4028", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Swabian", "iso_1_code": null, "iso_3_code": "swg", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"de\")", "original_lang_name": "german", "original_lang_code": "deu", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4029", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Walser", "iso_1_code": null, "iso_3_code": "wae", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4030", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "4026", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Bavarian-Austrian", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"de\")", "original_lang_name": "german", "original_lang_code": "deu", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "4026", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Bavarian-Austrian", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Bavarian", "iso_1_code": null, "iso_3_code": "bar", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"de\")", "original_lang_name": "german", "original_lang_code": "deu", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4032", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Cimbrian", "iso_1_code": null, "iso_3_code": "cim", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4033", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hutterisch", "iso_1_code": null, "iso_3_code": "geh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4034", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "M\u00f2cheno", "iso_1_code": null, "iso_3_code": "mhn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4035", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"de\")", + "original_lang_name": "german", + "original_lang_code": "deu", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4031", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"de\")", + "original_lang_name": "german", + "original_lang_code": "deu", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4024", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"de\")", + "original_lang_name": "german", + "original_lang_code": "deu", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4010", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yiddish", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yiddish, Eastern", "iso_1_code": "yi", "iso_3_code": "ydd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4037", + "native_tokenizers": [], "scripts": [ "Hebr" - ], - "own_tokenizer": false + ] }, { "name": "Yiddish, Western", "iso_1_code": "yi", "iso_3_code": "yih", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4038", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4036", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"de\")", + "original_lang_name": "german", + "original_lang_code": "deu", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4009", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Low Saxon-Low Franconian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"nl\")", - "original_lang_name": "dutch", - "original_lang_code": "nld", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Low Franconian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"nl\")", - "original_lang_name": "dutch", - "original_lang_code": "nld", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Afrikaans", "iso_1_code": "af", "iso_3_code": "afr", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"af\")", "original_lang_name": "afrikaans", "original_lang_code": "afr", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4041", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] }, { "name": "Limburgish", "iso_1_code": "li", "iso_3_code": "lim", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"nl\")", "original_lang_name": "dutch", "original_lang_code": "nld", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4042", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Dutch", "iso_1_code": "nl", "iso_3_code": "nld", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"nl\")", "original_lang_name": "dutch", "original_lang_code": "nld", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4043", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] }, { "name": "West Flemish", "iso_1_code": null, "iso_3_code": "vls", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"nl\")", "original_lang_name": "dutch", "original_lang_code": "nld", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4044", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zeeuws", "iso_1_code": null, "iso_3_code": "zea", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"nl\")", "original_lang_name": "dutch", "original_lang_code": "nld", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4045", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "4040", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Low Saxon", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"nl\")", "original_lang_name": "dutch", "original_lang_code": "nld", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "4040", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Low Saxon", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Achterhoeks", "iso_1_code": null, "iso_3_code": "act", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4047", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Drents", "iso_1_code": null, "iso_3_code": "drt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4048", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saxon, East Frisian Low", "iso_1_code": null, "iso_3_code": "frs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4049", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gronings", "iso_1_code": null, "iso_3_code": "gos", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"nl\")", "original_lang_name": "dutch", "original_lang_code": "nld", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4050", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Saxon, Low", "iso_1_code": null, "iso_3_code": "nds", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"nl\")", "original_lang_name": "dutch", "original_lang_code": "nld", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4051", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Plautdietsch", "iso_1_code": null, "iso_3_code": "pdt", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"nl\")", "original_lang_name": "dutch", "original_lang_code": "nld", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4052", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sallands", "iso_1_code": null, "iso_3_code": "sdz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4053", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Stellingwerfs", "iso_1_code": null, "iso_3_code": "stl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4054", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Twents", "iso_1_code": null, "iso_3_code": "twd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4055", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Veluws", "iso_1_code": null, "iso_3_code": "vel", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4056", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Westphalien", "iso_1_code": null, "iso_3_code": "wep", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4057", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"nl\")", + "original_lang_name": "dutch", + "original_lang_code": "nld", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4046", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"nl\")", + "original_lang_name": "dutch", + "original_lang_code": "nld", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4039", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"en\")", + "original_lang_name": "english", + "original_lang_code": "eng", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4000", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"en\")", + "original_lang_name": "english", + "original_lang_code": "eng", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3984", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Greek", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Grek": { - "full_object": "SpaCyTokenizer(\"el\")", - "original_lang_name": "greek", - "original_lang_code": "ell", - "scripts": [ - "Grek" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Attic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Grek": { - "full_object": "SpaCyTokenizer(\"el\")", - "original_lang_name": "greek", - "original_lang_code": "ell", - "scripts": [ - "Grek" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Cappadocian Greek", "iso_1_code": null, "iso_3_code": "cpg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4060", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Greek", "iso_1_code": "el", "iso_3_code": "ell", + "children": [], "tokenizers": { "Grek": { "full_object": "SpaCyTokenizer(\"el\")", "original_lang_name": "greek", "original_lang_code": "ell", - "scripts": [ - "Grek" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Grek", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4061", - "scripts": [ + "native_tokenizers": [ "Grek" ], - "own_tokenizer": true + "scripts": [ + "Grek" + ] }, { "name": "Greek, Ancient", "iso_1_code": null, "iso_3_code": "grc", + "children": [], "tokenizers": { "Grek": { "full_object": "SpaCyTokenizer(\"grc\")", "original_lang_name": "ancient_greek", "original_lang_code": "grc", - "scripts": [ - "Grek" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Grek", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4062", - "scripts": [ + "native_tokenizers": [ "Grek" ], - "own_tokenizer": true + "scripts": [ + "Grek" + ] }, { "name": "Pontic", "iso_1_code": null, "iso_3_code": "pnt", + "children": [], "tokenizers": { "Grek": { "full_object": "SpaCyTokenizer(\"el\")", "original_lang_name": "greek", "original_lang_code": "ell", - "scripts": [ - "Grek" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Grek", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4063", + "native_tokenizers": [], "scripts": [ "Grek" - ], - "own_tokenizer": false + ] }, { "name": "Yevanic", "iso_1_code": null, "iso_3_code": "yej", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4064", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "script": "Grek", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4059", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Doric", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tsakonian", "iso_1_code": null, "iso_3_code": "tsd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4066", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4065", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "4058", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Indo-Iranian", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"hi\")", - "original_lang_name": "hindi", - "original_lang_code": "hin", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Deva": { - "full_object": "SpaCyTokenizer(\"hi\")", - "original_lang_name": "hindi", - "original_lang_code": "hin", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Gujr": { - "full_object": "SpaCyTokenizer(\"gu\")", - "original_lang_name": "gujarati", - "original_lang_code": "guj", - "scripts": [ - "Latn", - "Gujr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Guru": { - "full_object": "IndicNLPTokenizer(\"pa\")", - "original_lang_name": "punjabi", - "original_lang_code": "pan", - "scripts": [ - "Latn", - "Guru" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - }, - "Beng": { - "full_object": "SpaCyTokenizer(\"bn\")", - "original_lang_name": "bengali", - "original_lang_code": "ben", - "scripts": [ - "Latn", - "Beng" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Orya": { - "full_object": "IndicNLPTokenizer(\"or\")", - "original_lang_name": "oriya", - "original_lang_code": "ori", - "scripts": [ - "Latn", - "Orya" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"fa\")", - "original_lang_name": "persian", - "original_lang_code": "fas", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Sinh": { - "full_object": "SpaCyTokenizer(\"si\")", - "original_lang_name": "sinhala", - "original_lang_code": "sin", - "scripts": [ - "Sinh" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Cyrl": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, "Grek": { "full_object": "SpaCyTokenizer(\"el\")", "original_lang_name": "greek", "original_lang_code": "ell", - "scripts": [ - "Grek" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Grek", + "class_name": "SpaCyTokenizer" } }, + "node_i": "4058", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Indo-Iranian", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Indo-Aryan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"hi\")", - "original_lang_name": "hindi", - "original_lang_code": "hin", - "scripts": [ - "Latn", + "children": [ + { + "name": "Sanskrit", + "iso_1_code": "sa", + "iso_3_code": "san", + "children": [], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"sa\")", + "original_lang_name": "sanskrit", + "original_lang_code": "san", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, + "node_i": "4069", + "native_tokenizers": [ "Deva" ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Deva": { - "full_object": "SpaCyTokenizer(\"hi\")", - "original_lang_name": "hindi", - "original_lang_code": "hin", "scripts": [ "Latn", "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Gujr": { - "full_object": "SpaCyTokenizer(\"gu\")", - "original_lang_name": "gujarati", - "original_lang_code": "guj", - "scripts": [ - "Latn", - "Gujr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Guru": { - "full_object": "IndicNLPTokenizer(\"pa\")", - "original_lang_name": "punjabi", - "original_lang_code": "pan", - "scripts": [ - "Latn", - "Guru" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - }, - "Beng": { - "full_object": "SpaCyTokenizer(\"bn\")", - "original_lang_name": "bengali", - "original_lang_code": "ben", - "scripts": [ - "Latn", - "Beng" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Orya": { - "full_object": "IndicNLPTokenizer(\"or\")", - "original_lang_name": "oriya", - "original_lang_code": "ori", - "scripts": [ - "Latn", - "Orya" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ur\")", - "original_lang_name": "urdu", - "original_lang_code": "urd", - "scripts": [ - "Latn", - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Sinh": { - "full_object": "SpaCyTokenizer(\"si\")", - "original_lang_name": "sinhala", - "original_lang_code": "sin", - "scripts": [ - "Sinh" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Cyrl": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Grek": { - "full_object": "SpaCyTokenizer(\"el\")", - "original_lang_name": "greek", - "original_lang_code": "ell", - "scripts": [ - "Grek" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, - "children": [ - { - "name": "Sanskrit", - "iso_1_code": "sa", - "iso_3_code": "san", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"sa\")", - "original_lang_name": "sanskrit", - "original_lang_code": "san", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Deva": { - "full_object": "SpaCyTokenizer(\"sa\")", - "original_lang_name": "sanskrit", - "original_lang_code": "san", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, - "children": [], - "node_i": "4069", - "scripts": [ - "Deva", - "Latn" - ], - "own_tokenizer": true + ] }, { "name": "Intermediate Divisions", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ne\")", - "original_lang_name": "nepali", - "original_lang_code": "nep", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Deva": { - "full_object": "SpaCyTokenizer(\"ne\")", - "original_lang_name": "nepali", - "original_lang_code": "nep", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Gujr": { - "full_object": "SpaCyTokenizer(\"gu\")", - "original_lang_name": "gujarati", - "original_lang_code": "guj", - "scripts": [ - "Latn", - "Gujr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Guru": { - "full_object": "IndicNLPTokenizer(\"pa\")", - "original_lang_name": "punjabi", - "original_lang_code": "pan", - "scripts": [ - "Latn", - "Guru" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ur\")", - "original_lang_name": "urdu", - "original_lang_code": "urd", - "scripts": [ - "Latn", - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Cyrl": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Grek": { - "full_object": "SpaCyTokenizer(\"el\")", - "original_lang_name": "greek", - "original_lang_code": "ell", - "scripts": [ - "Grek" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ne\")", - "original_lang_name": "nepali", - "original_lang_code": "nep", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Deva": { - "full_object": "SpaCyTokenizer(\"ne\")", - "original_lang_name": "nepali", - "original_lang_code": "nep", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "East Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ne\")", - "original_lang_name": "nepali", - "original_lang_code": "nep", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Deva": { - "full_object": "SpaCyTokenizer(\"ne\")", - "original_lang_name": "nepali", - "original_lang_code": "nep", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Awadhi", "iso_1_code": null, "iso_3_code": "awa", + "children": [], "tokenizers": { "Deva": { - "full_object": "SpaCyTokenizer(\"ne\")", - "original_lang_name": "nepali", - "original_lang_code": "nep", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "script": "Deva", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "4073", + "native_tokenizers": [], "scripts": [ "Deva" - ], - "own_tokenizer": false + ] }, { "name": "Bagheli", "iso_1_code": null, "iso_3_code": "bfy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4074", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Fiji Hindi", "iso_1_code": null, "iso_3_code": "hif", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ne\")", - "original_lang_name": "nepali", - "original_lang_code": "nep", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "4075", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chhattisgarhi", "iso_1_code": null, "iso_3_code": "hne", + "children": [], "tokenizers": { "Deva": { - "full_object": "SpaCyTokenizer(\"ne\")", - "original_lang_name": "nepali", - "original_lang_code": "nep", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "script": "Deva", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "4076", + "native_tokenizers": [], "scripts": [ "Deva" - ], - "own_tokenizer": false + ] }, { "name": "Kamar", "iso_1_code": null, "iso_3_code": "keq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4077", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Surgujia", "iso_1_code": null, "iso_3_code": "sgj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4078", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "4072", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern Pahari", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ne\")", - "original_lang_name": "nepali", - "original_lang_code": "nep", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Deva": { - "full_object": "SpaCyTokenizer(\"ne\")", - "original_lang_name": "nepali", - "original_lang_code": "nep", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Dotyali", "iso_1_code": "ne", "iso_3_code": "dty", + "children": [], "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ne\")", - "original_lang_name": "nepali", - "original_lang_code": "nep", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, "Deva": { - "full_object": "SpaCyTokenizer(\"ne\")", - "original_lang_name": "nepali", - "original_lang_code": "nep", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "script": "Deva", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "4080", + "native_tokenizers": [], "scripts": [ "Deva" - ], - "own_tokenizer": true + ] }, { "name": "Jumli", "iso_1_code": null, "iso_3_code": "jml", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4081", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nepali", "iso_1_code": "ne", "iso_3_code": "npi", + "children": [], "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ne\")", - "original_lang_name": "nepali", - "original_lang_code": "nep", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, "Deva": { - "full_object": "SpaCyTokenizer(\"ne\")", - "original_lang_name": "nepali", - "original_lang_code": "nep", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "4082", + "native_tokenizers": [], "scripts": [ "Latn", "Deva" - ], - "own_tokenizer": true + ] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "4079", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "4071", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Western", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"gu\")", - "original_lang_name": "gujarati", - "original_lang_code": "guj", - "scripts": [ - "Latn", - "Gujr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Gujr": { - "full_object": "SpaCyTokenizer(\"gu\")", - "original_lang_name": "gujarati", - "original_lang_code": "guj", - "scripts": [ - "Latn", - "Gujr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Guru": { - "full_object": "IndicNLPTokenizer(\"pa\")", - "original_lang_name": "punjabi", - "original_lang_code": "pan", - "scripts": [ - "Latn", - "Guru" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - }, "Deva": { - "full_object": "SpaCyTokenizer(\"ne\")", - "original_lang_name": "nepali", - "original_lang_code": "nep", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ur\")", - "original_lang_name": "urdu", - "original_lang_code": "urd", - "scripts": [ - "Latn", - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "script": "Deva", + "class_name": "IndicNLPTokenizer" }, - "Cyrl": { + "Latn": { "full_object": "StanzaTokenizer(\"kmr\")", "original_lang_name": "northern_kurdish", "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Grek": { - "full_object": "SpaCyTokenizer(\"el\")", - "original_lang_name": "greek", - "original_lang_code": "ell", - "scripts": [ - "Grek" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "4071", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Western", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Dawoodi", "iso_1_code": null, "iso_3_code": "dmk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4084", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Parya", "iso_1_code": null, "iso_3_code": "paq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4085", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Powari", "iso_1_code": null, "iso_3_code": "pwr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4086", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bhil", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bareli, Pauri", "iso_1_code": null, "iso_3_code": "bfb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4088", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bareli, Rathwi", "iso_1_code": null, "iso_3_code": "bgd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4089", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bauria", "iso_1_code": null, "iso_3_code": "bge", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4090", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bhili", "iso_1_code": null, "iso_3_code": "bhb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4091", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bhilali", "iso_1_code": null, "iso_3_code": "bhi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4092", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bareli, Palya", "iso_1_code": null, "iso_3_code": "bpx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4093", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chodri", "iso_1_code": null, "iso_3_code": "cdi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4094", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dhodia", "iso_1_code": null, "iso_3_code": "dho", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4095", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dubli", "iso_1_code": null, "iso_3_code": "dub", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4096", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dungra Bhil", "iso_1_code": null, "iso_3_code": "duh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4097", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Garasia, Adiwasi", "iso_1_code": null, "iso_3_code": "gas", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4098", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gamit", "iso_1_code": null, "iso_3_code": "gbl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4099", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Garasia, Rajput", "iso_1_code": null, "iso_3_code": "gra", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4100", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mawchi", "iso_1_code": null, "iso_3_code": "mke", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4101", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nahali", "iso_1_code": null, "iso_3_code": "nlx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4102", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Noiri", "iso_1_code": null, "iso_3_code": "noi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4103", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pardhi", "iso_1_code": null, "iso_3_code": "pcl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4104", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rathawi", "iso_1_code": null, "iso_3_code": "rtw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4105", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wagdi", "iso_1_code": null, "iso_3_code": "wbr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4106", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4087", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dom", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Domari", "iso_1_code": null, "iso_3_code": "rmt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4108", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4107", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gujarati", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"gu\")", - "original_lang_name": "gujarati", - "original_lang_code": "guj", - "scripts": [ - "Latn", - "Gujr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Gujr": { - "full_object": "SpaCyTokenizer(\"gu\")", - "original_lang_name": "gujarati", - "original_lang_code": "guj", - "scripts": [ - "Latn", - "Gujr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Aer", "iso_1_code": null, "iso_3_code": "aeq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4110", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koli, Kachi", "iso_1_code": null, "iso_3_code": "gjk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4111", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gujarati", "iso_1_code": "gu", "iso_3_code": "guj", + "children": [], "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"gu\")", - "original_lang_name": "gujarati", - "original_lang_code": "guj", - "scripts": [ - "Latn", - "Gujr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, "Gujr": { - "full_object": "SpaCyTokenizer(\"gu\")", + "full_object": "IndicNLPTokenizer(\"gu\")", "original_lang_name": "gujarati", "original_lang_code": "guj", - "scripts": [ - "Latn", - "Gujr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Gujr", + "class_name": "IndicNLPTokenizer" + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "4112", + "native_tokenizers": [ + "Gujr" + ], "scripts": [ "Gujr", "Latn" - ], - "own_tokenizer": true + ] }, { "name": "Jandavra", "iso_1_code": null, "iso_3_code": "jnd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4113", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koli, Parkari", "iso_1_code": null, "iso_3_code": "kvx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4114", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koli, Wadiyari", "iso_1_code": null, "iso_3_code": "kxp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4115", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sourashtra", "iso_1_code": null, "iso_3_code": "saz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4116", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vasavi", "iso_1_code": null, "iso_3_code": "vas", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4117", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vaghri", "iso_1_code": null, "iso_3_code": "vgr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4118", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Gujr": { + "full_object": "IndicNLPTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "script": "Gujr", + "class_name": "IndicNLPTokenizer" + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "4109", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khandesi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ahirani", "iso_1_code": null, "iso_3_code": "ahr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4120", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dangi", "iso_1_code": null, "iso_3_code": "dhn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4121", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khandesi", "iso_1_code": null, "iso_3_code": "khn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4122", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4119", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pahari", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Central Pahari", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kumaoni", "iso_1_code": null, "iso_3_code": "kfy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4125", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4124", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Garhwali", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Garhwali", "iso_1_code": null, "iso_3_code": "gbm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4127", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4126", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western Pahari", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pahari, Mahasu", "iso_1_code": null, "iso_3_code": "bfz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4129", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bhadrawahi", "iso_1_code": null, "iso_3_code": "bhd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4130", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bhattiyali", "iso_1_code": null, "iso_3_code": "bht", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4131", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chambeali", "iso_1_code": null, "iso_3_code": "cdh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4132", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Churahi", "iso_1_code": null, "iso_3_code": "cdj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4133", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dogri", "iso_1_code": null, "iso_3_code": "dgo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4134", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gaddi", "iso_1_code": null, "iso_3_code": "gbk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4135", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hinduri", "iso_1_code": null, "iso_3_code": "hii", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4136", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khah", "iso_1_code": null, "iso_3_code": "hkh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4137", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jaunsari", "iso_1_code": null, "iso_3_code": "jns", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4138", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bilaspuri", "iso_1_code": null, "iso_3_code": "kfs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4139", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pahari, Kullu", "iso_1_code": null, "iso_3_code": "kfx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4140", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kinnauri, Pahari", "iso_1_code": null, "iso_3_code": "kjo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4141", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mandeali", "iso_1_code": null, "iso_3_code": "mjl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4142", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pangwali", "iso_1_code": null, "iso_3_code": "pgg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4143", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sirmauri", "iso_1_code": null, "iso_3_code": "srx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4144", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kangri", "iso_1_code": null, "iso_3_code": "xnr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4145", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4128", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4123", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Panjabi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"pa\")", - "original_lang_name": "punjabi", - "original_lang_code": "pan", - "scripts": [ - "Latn", - "Guru" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - }, - "Guru": { - "full_object": "IndicNLPTokenizer(\"pa\")", - "original_lang_name": "punjabi", - "original_lang_code": "pan", - "scripts": [ - "Latn", - "Guru" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ur\")", - "original_lang_name": "urdu", - "original_lang_code": "urd", - "scripts": [ - "Latn", - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Punjabi, Eastern", "iso_1_code": "pa", "iso_3_code": "pan", + "children": [], "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"pa\")", - "original_lang_name": "punjabi", - "original_lang_code": "pan", - "scripts": [ - "Latn", - "Guru" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - }, "Guru": { "full_object": "IndicNLPTokenizer(\"pa\")", "original_lang_name": "punjabi", "original_lang_code": "pan", - "scripts": [ - "Latn", - "Guru" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "script": "Guru", + "class_name": "IndicNLPTokenizer" + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "4147", + "native_tokenizers": [ + "Guru" + ], "scripts": [ "Latn", "Guru" - ], - "own_tokenizer": true + ] }, { "name": "Western Panjabi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"ur\")", - "original_lang_name": "urdu", - "original_lang_code": "urd", - "scripts": [ - "Latn", - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Hindko, Southern", "iso_1_code": null, "iso_3_code": "hnd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4149", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hindko, Northern", "iso_1_code": null, "iso_3_code": "hno", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4150", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Inku", "iso_1_code": null, "iso_3_code": "jat", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4151", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pahari-Potwari", "iso_1_code": null, "iso_3_code": "phr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4152", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Punjabi, Western", "iso_1_code": null, "iso_3_code": "pnb", + "children": [], "tokenizers": { "Arab": { - "full_object": "SpaCyTokenizer(\"ur\")", + "full_object": "IndicNLPTokenizer(\"ur\")", "original_lang_name": "urdu", "original_lang_code": "urd", - "scripts": [ - "Latn", - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Arab", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "4153", + "native_tokenizers": [], "scripts": [ "Arab" - ], - "own_tokenizer": false + ] }, { "name": "Saraiki", "iso_1_code": null, "iso_3_code": "skr", + "children": [], "tokenizers": { "Arab": { - "full_object": "SpaCyTokenizer(\"ur\")", + "full_object": "IndicNLPTokenizer(\"ur\")", "original_lang_name": "urdu", "original_lang_code": "urd", - "scripts": [ - "Latn", - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Arab", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "4154", + "native_tokenizers": [], "scripts": [ "Arab" - ], - "own_tokenizer": false + ] }, { "name": "Khetrani", "iso_1_code": null, "iso_3_code": "xhe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4155", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Arab": { + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "4148", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "script": "Guru", + "class_name": "IndicNLPTokenizer" + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "4146", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rajasthani", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Deva": { - "full_object": "SpaCyTokenizer(\"ne\")", - "original_lang_name": "nepali", - "original_lang_code": "nep", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Gujari", "iso_1_code": null, "iso_3_code": "gju", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4157", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Marwari", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dhundari", "iso_1_code": null, "iso_3_code": "dhd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4159", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Godwari", "iso_1_code": null, "iso_3_code": "gdx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4160", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Goaria", "iso_1_code": null, "iso_3_code": "gig", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4161", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jogi", "iso_1_code": null, "iso_3_code": "jog", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4162", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Loarki", "iso_1_code": null, "iso_3_code": "lrk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4163", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dhatki", "iso_1_code": null, "iso_3_code": "mki", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4164", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mewari", "iso_1_code": null, "iso_3_code": "mtr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4165", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Marwari", "iso_1_code": null, "iso_3_code": "mve", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4166", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Marwari", "iso_1_code": null, "iso_3_code": "rwr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4167", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shekhawati", "iso_1_code": null, "iso_3_code": "swv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4168", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Merwari", "iso_1_code": null, "iso_3_code": "wry", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4169", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4158", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Deva": { - "full_object": "SpaCyTokenizer(\"ne\")", - "original_lang_name": "nepali", - "original_lang_code": "nep", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bagri", "iso_1_code": null, "iso_3_code": "bgq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4171", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lohar, Gade", "iso_1_code": null, "iso_3_code": "gda", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4172", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gurgula", "iso_1_code": null, "iso_3_code": "ggg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4173", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Haroti", "iso_1_code": null, "iso_3_code": "hoj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4174", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lambadi", "iso_1_code": null, "iso_3_code": "lmn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4175", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malvi", "iso_1_code": null, "iso_3_code": "mup", - "tokenizers": { - "Deva": { - "full_object": "SpaCyTokenizer(\"ne\")", - "original_lang_name": "nepali", - "original_lang_code": "nep", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, - "children": [], - "node_i": "4176", - "scripts": [ - "Deva" - ], - "own_tokenizer": false - }, - { - "name": "Nimadi", - "iso_1_code": null, - "iso_3_code": "noe", - "tokenizers": {}, "children": [], - "node_i": "4177", - "scripts": [], - "own_tokenizer": false - } - ], - "node_i": "4170", - "scripts": [], - "own_tokenizer": false - } - ], - "node_i": "4156", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Romani", - "iso_1_code": null, - "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"gu\")", - "original_lang_name": "gujarati", - "original_lang_code": "guj", - "scripts": [ - "Latn", - "Gujr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Gujr": { - "full_object": "SpaCyTokenizer(\"gu\")", - "original_lang_name": "gujarati", - "original_lang_code": "guj", - "scripts": [ - "Latn", - "Gujr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Cyrl": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Grek": { - "full_object": "SpaCyTokenizer(\"el\")", - "original_lang_name": "greek", - "original_lang_code": "ell", - "scripts": [ - "Grek" + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, + "node_i": "4176", + "native_tokenizers": [], + "scripts": [ + "Deva" + ] + }, + { + "name": "Nimadi", + "iso_1_code": null, + "iso_3_code": "noe", + "children": [], + "tokenizers": {}, + "node_i": "4177", + "native_tokenizers": [], + "scripts": [] + } ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, + "node_i": "4170", + "native_tokenizers": [], + "scripts": [] + } + ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "script": "Deva", + "class_name": "IndicNLPTokenizer" } }, + "node_i": "4156", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Romani", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Balkan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"gu\")", - "original_lang_name": "gujarati", - "original_lang_code": "guj", - "scripts": [ - "Latn", - "Gujr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Gujr": { - "full_object": "SpaCyTokenizer(\"gu\")", - "original_lang_name": "gujarati", - "original_lang_code": "guj", - "scripts": [ - "Latn", - "Gujr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Cyrl": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Grek": { - "full_object": "SpaCyTokenizer(\"el\")", - "original_lang_name": "greek", - "original_lang_code": "ell", - "scripts": [ - "Grek" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Romani, Balkan", "iso_1_code": null, "iso_3_code": "rmn", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"gu\")", - "original_lang_name": "gujarati", - "original_lang_code": "guj", - "scripts": [ - "Latn", - "Gujr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Cyrl": { "full_object": "StanzaTokenizer(\"kmr\")", "original_lang_name": "northern_kurdish", "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" }, "Grek": { "full_object": "SpaCyTokenizer(\"el\")", "original_lang_name": "greek", "original_lang_code": "ell", - "scripts": [ - "Grek" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Grek", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4180", + "native_tokenizers": [], "scripts": [ "Latn", "Cyrl", "Grek" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "script": "Grek", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4179", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"gu\")", - "original_lang_name": "gujarati", - "original_lang_code": "guj", - "scripts": [ - "Latn", - "Gujr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Gujr": { - "full_object": "SpaCyTokenizer(\"gu\")", - "original_lang_name": "gujarati", - "original_lang_code": "guj", - "scripts": [ - "Latn", - "Gujr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Romani, Carpathian", "iso_1_code": null, "iso_3_code": "rmc", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"gu\")", - "original_lang_name": "gujarati", - "original_lang_code": "guj", - "scripts": [ - "Latn", - "Gujr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "4182", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Romani, Kalo Finnish", "iso_1_code": null, "iso_3_code": "rmf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4183", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Romani, Baltic", "iso_1_code": null, "iso_3_code": "rml", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"gu\")", - "original_lang_name": "gujarati", - "original_lang_code": "guj", - "scripts": [ - "Latn", - "Gujr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "4184", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Romani, Sinte", "iso_1_code": null, "iso_3_code": "rmo", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"gu\")", - "original_lang_name": "gujarati", - "original_lang_code": "guj", - "scripts": [ - "Latn", - "Gujr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "4185", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Romani, Welsh", "iso_1_code": null, "iso_3_code": "rmw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4186", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "4181", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Vlax", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"gu\")", - "original_lang_name": "gujarati", - "original_lang_code": "guj", - "scripts": [ - "Latn", - "Gujr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Gujr": { - "full_object": "SpaCyTokenizer(\"gu\")", - "original_lang_name": "gujarati", - "original_lang_code": "guj", - "scripts": [ - "Latn", - "Gujr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Cyrl": { "full_object": "StanzaTokenizer(\"kmr\")", "original_lang_name": "northern_kurdish", "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "4181", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Vlax", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Romani, Vlax", "iso_1_code": null, "iso_3_code": "rmy", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"gu\")", - "original_lang_name": "gujarati", - "original_lang_code": "guj", - "scripts": [ - "Latn", - "Gujr" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Cyrl": { "full_object": "StanzaTokenizer(\"kmr\")", "original_lang_name": "northern_kurdish", "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4188", + "native_tokenizers": [], "scripts": [ "Latn", "Cyrl" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4187", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "script": "Grek", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4178", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sonha", "iso_1_code": null, "iso_3_code": "soi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4190", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mewati", "iso_1_code": null, "iso_3_code": "wtm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4191", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4189", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "4083", - "scripts": [], - "own_tokenizer": false - } - ], - "node_i": "4070", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Outer Languages", - "iso_1_code": null, - "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"bn\")", - "original_lang_name": "bengali", - "original_lang_code": "ben", - "scripts": [ - "Latn", - "Beng" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Beng": { - "full_object": "SpaCyTokenizer(\"bn\")", - "original_lang_name": "bengali", - "original_lang_code": "ben", - "scripts": [ - "Latn", - "Beng" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "tokenizers": { + "Gujr": { + "full_object": "IndicNLPTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "script": "Gujr", + "class_name": "IndicNLPTokenizer" + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "script": "Guru", + "class_name": "IndicNLPTokenizer" + }, + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "script": "Grek", + "class_name": "SpaCyTokenizer" + } + }, + "node_i": "4083", + "native_tokenizers": [], + "scripts": [] + } + ], + "tokenizers": { + "Gujr": { + "full_object": "IndicNLPTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "script": "Gujr", + "class_name": "IndicNLPTokenizer" }, - "Orya": { - "full_object": "IndicNLPTokenizer(\"or\")", - "original_lang_name": "oriya", - "original_lang_code": "ori", - "scripts": [ - "Latn", - "Orya" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": true + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "script": "Guru", + "class_name": "IndicNLPTokenizer" }, "Deva": { - "full_object": "SpaCyTokenizer(\"mr\")", - "original_lang_name": "marathi", - "original_lang_code": "mar", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "script": "Deva", + "class_name": "IndicNLPTokenizer" }, "Arab": { - "full_object": "IndicNLPTokenizer(\"sd\")", - "original_lang_name": "sindhi", - "original_lang_code": "snd", - "scripts": [ - "Latn", - "Deva", - "Arab" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" }, - "Sinh": { - "full_object": "SpaCyTokenizer(\"si\")", - "original_lang_name": "sinhala", - "original_lang_code": "sin", - "scripts": [ - "Sinh" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "script": "Grek", + "class_name": "SpaCyTokenizer" } }, + "node_i": "4070", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Outer Languages", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"bn\")", - "original_lang_name": "bengali", - "original_lang_code": "ben", - "scripts": [ - "Latn", - "Beng" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Beng": { - "full_object": "SpaCyTokenizer(\"bn\")", - "original_lang_name": "bengali", - "original_lang_code": "ben", - "scripts": [ - "Latn", - "Beng" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Orya": { - "full_object": "IndicNLPTokenizer(\"or\")", - "original_lang_name": "oriya", - "original_lang_code": "ori", - "scripts": [ - "Latn", - "Orya" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": true - }, - "Deva": { - "full_object": "SpaCyTokenizer(\"mr\")", - "original_lang_name": "marathi", - "original_lang_code": "mar", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bengali-Assamese", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"bn\")", - "original_lang_name": "bengali", - "original_lang_code": "ben", - "scripts": [ - "Latn", - "Beng" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Beng": { - "full_object": "SpaCyTokenizer(\"bn\")", - "original_lang_name": "bengali", - "original_lang_code": "ben", - "scripts": [ - "Latn", - "Beng" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Deva": { - "full_object": "SpaCyTokenizer(\"mr\")", - "original_lang_name": "marathi", - "original_lang_code": "mar", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Assamese", "iso_1_code": "as", "iso_3_code": "asm", + "children": [], "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"as\")", - "original_lang_name": "assamese", - "original_lang_code": "asm", - "scripts": [ - "Latn", - "Beng" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - }, "Beng": { "full_object": "IndicNLPTokenizer(\"as\")", "original_lang_name": "assamese", "original_lang_code": "asm", - "scripts": [ - "Latn", - "Beng" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "script": "Beng", + "class_name": "IndicNLPTokenizer" + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "4195", + "native_tokenizers": [ + "Beng" + ], "scripts": [ "Beng", "Latn" - ], - "own_tokenizer": true + ] }, { "name": "Bengali", "iso_1_code": "bn", "iso_3_code": "ben", + "children": [], "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"bn\")", - "original_lang_name": "bengali", - "original_lang_code": "ben", - "scripts": [ - "Latn", - "Beng" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, "Beng": { - "full_object": "SpaCyTokenizer(\"bn\")", + "full_object": "IndicNLPTokenizer(\"bn\")", "original_lang_name": "bengali", "original_lang_code": "ben", - "scripts": [ - "Latn", - "Beng" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Beng", + "class_name": "IndicNLPTokenizer" + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "4196", + "native_tokenizers": [ + "Beng" + ], "scripts": [ "Beng", "Latn" - ], - "own_tokenizer": true + ] }, { "name": "Bishnupuriya", "iso_1_code": null, "iso_3_code": "bpy", + "children": [], "tokenizers": { "Beng": { - "full_object": "SpaCyTokenizer(\"bn\")", + "full_object": "IndicNLPTokenizer(\"bn\")", "original_lang_name": "bengali", "original_lang_code": "ben", - "scripts": [ - "Latn", - "Beng" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Beng", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "4197", + "native_tokenizers": [], "scripts": [ "Beng" - ], - "own_tokenizer": false + ] }, { "name": "Chakma", "iso_1_code": null, "iso_3_code": "ccp", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"bn\")", - "original_lang_name": "bengali", - "original_lang_code": "ben", - "scripts": [ - "Latn", - "Beng" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "4198", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chittagonian", "iso_1_code": null, "iso_3_code": "ctg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4199", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hajong", "iso_1_code": null, "iso_3_code": "haj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4200", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Halbi", "iso_1_code": null, "iso_3_code": "hlb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4201", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kurmukar", "iso_1_code": null, "iso_3_code": "kfv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4202", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kharia Thar", "iso_1_code": null, "iso_3_code": "ksy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4203", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kewat", "iso_1_code": null, "iso_3_code": "kyv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4204", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lodhi", "iso_1_code": null, "iso_3_code": "lbm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4205", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mal Paharia", "iso_1_code": null, "iso_3_code": "mkb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4206", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nahari", "iso_1_code": null, "iso_3_code": "nhh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4207", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rohingya", "iso_1_code": null, "iso_3_code": "rhg", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"bn\")", - "original_lang_name": "bengali", - "original_lang_code": "ben", - "scripts": [ - "Latn", - "Beng" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "4208", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Rajbanshi", "iso_1_code": null, "iso_3_code": "rjs", + "children": [], "tokenizers": { "Deva": { - "full_object": "SpaCyTokenizer(\"mr\")", - "original_lang_name": "marathi", - "original_lang_code": "mar", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "maithili", + "original_lang_code": "mai", + "script": "Deva", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "4209", + "native_tokenizers": [], "scripts": [ "Deva" - ], - "own_tokenizer": false + ] }, { "name": "Rangpuri", "iso_1_code": null, "iso_3_code": "rkt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4210", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sylheti", "iso_1_code": null, "iso_3_code": "syl", + "children": [], "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"bn\")", - "original_lang_name": "bengali", - "original_lang_code": "ben", - "scripts": [ - "Latn", - "Beng" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, "Beng": { - "full_object": "SpaCyTokenizer(\"bn\")", + "full_object": "IndicNLPTokenizer(\"bn\")", "original_lang_name": "bengali", "original_lang_code": "ben", - "scripts": [ - "Latn", - "Beng" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Beng", + "class_name": "IndicNLPTokenizer" + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "4211", + "native_tokenizers": [], "scripts": [ "Latn", "Beng" - ], - "own_tokenizer": false + ] }, { "name": "Tangchangya", "iso_1_code": null, "iso_3_code": "tnv", - "tokenizers": {}, "children": [], - "node_i": "4212", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Mirgan", - "iso_1_code": null, - "iso_3_code": "zrg", "tokenizers": {}, - "children": [], - "node_i": "4213", - "scripts": [], - "own_tokenizer": false - } - ], - "node_i": "4194", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Bihari", - "iso_1_code": null, - "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"bn\")", - "original_lang_name": "bengali", - "original_lang_code": "ben", - "scripts": [ - "Latn", - "Beng" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "node_i": "4212", + "native_tokenizers": [], + "scripts": [] }, + { + "name": "Mirgan", + "iso_1_code": null, + "iso_3_code": "zrg", + "children": [], + "tokenizers": {}, + "node_i": "4213", + "native_tokenizers": [], + "scripts": [] + } + ], + "tokenizers": { "Beng": { - "full_object": "SpaCyTokenizer(\"bn\")", + "full_object": "IndicNLPTokenizer(\"bn\")", "original_lang_name": "bengali", "original_lang_code": "ben", - "scripts": [ - "Latn", - "Beng" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Beng", + "class_name": "IndicNLPTokenizer" }, "Deva": { - "full_object": "SpaCyTokenizer(\"mr\")", - "original_lang_name": "marathi", - "original_lang_code": "mar", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "maithili", + "original_lang_code": "mai", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "4194", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Bihari", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Bhojpuri", "iso_1_code": null, "iso_3_code": "bho", + "children": [], "tokenizers": { "Deva": { - "full_object": "SpaCyTokenizer(\"mr\")", - "original_lang_name": "marathi", - "original_lang_code": "mar", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "maithili", + "original_lang_code": "mai", + "script": "Deva", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "4215", + "native_tokenizers": [], "scripts": [ "Deva" - ], - "own_tokenizer": false + ] }, { "name": "Hindustani, Sarnami", "iso_1_code": null, "iso_3_code": "hns", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"bn\")", - "original_lang_name": "bengali", - "original_lang_code": "ben", - "scripts": [ - "Latn", - "Beng" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "4216", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kudmali", "iso_1_code": null, "iso_3_code": "kyw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4217", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Magahi", "iso_1_code": null, "iso_3_code": "mag", + "children": [], "tokenizers": { "Deva": { - "full_object": "SpaCyTokenizer(\"mr\")", - "original_lang_name": "marathi", - "original_lang_code": "mar", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "maithili", + "original_lang_code": "mai", + "script": "Deva", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "4218", + "native_tokenizers": [], "scripts": [ "Deva" - ], - "own_tokenizer": false + ] }, { "name": "Maithili", "iso_1_code": null, "iso_3_code": "mai", + "children": [], "tokenizers": { "Deva": { - "full_object": "SpaCyTokenizer(\"mr\")", - "original_lang_name": "marathi", - "original_lang_code": "mar", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "maithili", + "original_lang_code": "mai", + "script": "Deva", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "4219", - "scripts": [ + "native_tokenizers": [ "Deva" ], - "own_tokenizer": false + "scripts": [ + "Deva" + ] }, { "name": "Majhi", "iso_1_code": null, "iso_3_code": "mjz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4220", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sadri", "iso_1_code": null, "iso_3_code": "sck", + "children": [], "tokenizers": { "Deva": { - "full_object": "SpaCyTokenizer(\"mr\")", - "original_lang_name": "marathi", - "original_lang_code": "mar", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "maithili", + "original_lang_code": "mai", + "script": "Deva", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "4221", + "native_tokenizers": [], "scripts": [ "Deva" - ], - "own_tokenizer": false + ] }, { "name": "Sadri, Oraon", "iso_1_code": null, "iso_3_code": "sdr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4222", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Surjapuri", "iso_1_code": null, "iso_3_code": "sjp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4223", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Musasa", "iso_1_code": null, "iso_3_code": "smm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4224", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Panchpargania", "iso_1_code": null, "iso_3_code": "tdb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4225", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bajjika", "iso_1_code": null, "iso_3_code": "vjk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4226", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "maithili", + "original_lang_code": "mai", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "4214", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oriya", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"or\")", - "original_lang_name": "oriya", - "original_lang_code": "ori", - "scripts": [ - "Latn", - "Orya" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": true - }, - "Orya": { - "full_object": "IndicNLPTokenizer(\"or\")", - "original_lang_name": "oriya", - "original_lang_code": "ori", - "scripts": [ - "Latn", - "Orya" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bodo Parja", "iso_1_code": null, "iso_3_code": "bdv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4228", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bhatri", "iso_1_code": null, "iso_3_code": "bgw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4229", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bhunjia", "iso_1_code": null, "iso_3_code": "bhu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4230", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Desiya", "iso_1_code": null, "iso_3_code": "dso", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4231", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kupia", "iso_1_code": null, "iso_3_code": "key", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4232", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oriya, Adivasi", "iso_1_code": null, "iso_3_code": "ort", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4233", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Odia", "iso_1_code": "or", "iso_3_code": "ory", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"or\")", - "original_lang_name": "oriya", - "original_lang_code": "ori", - "scripts": [ - "Latn", - "Orya" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": true - }, - "Orya": { - "full_object": "IndicNLPTokenizer(\"or\")", - "original_lang_name": "oriya", - "original_lang_code": "ori", - "scripts": [ - "Latn", - "Orya" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "4234", + "native_tokenizers": [], "scripts": [ "Latn", "Orya" - ], - "own_tokenizer": true + ] }, { "name": "Reli", "iso_1_code": null, "iso_3_code": "rei", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4235", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sambalpuri", "iso_1_code": "or", "iso_3_code": "spv", - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"or\")", - "original_lang_name": "oriya", - "original_lang_code": "ori", - "scripts": [ - "Latn", - "Orya" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": true - }, - "Orya": { - "full_object": "IndicNLPTokenizer(\"or\")", - "original_lang_name": "oriya", - "original_lang_code": "ori", - "scripts": [ - "Latn", - "Orya" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "4236", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "4227", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Deva": { - "full_object": "SpaCyTokenizer(\"mr\")", - "original_lang_name": "marathi", - "original_lang_code": "mar", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Angika", "iso_1_code": null, "iso_3_code": "anp", + "children": [], "tokenizers": { "Deva": { - "full_object": "SpaCyTokenizer(\"mr\")", - "original_lang_name": "marathi", - "original_lang_code": "mar", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "maithili", + "original_lang_code": "mai", + "script": "Deva", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "4238", + "native_tokenizers": [], "scripts": [ "Deva" - ], - "own_tokenizer": false + ] }, { "name": "Bote", "iso_1_code": null, "iso_3_code": "bmj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4239", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Buksa", "iso_1_code": null, "iso_3_code": "tkb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4240", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "maithili", + "original_lang_code": "mai", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "4237", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "4193", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Northwestern", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"sd\")", - "original_lang_name": "sindhi", - "original_lang_code": "snd", - "scripts": [ - "Latn", - "Deva", - "Arab" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "Beng": { + "full_object": "IndicNLPTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "script": "Beng", + "class_name": "IndicNLPTokenizer" }, "Deva": { - "full_object": "IndicNLPTokenizer(\"sd\")", - "original_lang_name": "sindhi", - "original_lang_code": "snd", - "scripts": [ - "Latn", - "Deva", - "Arab" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "maithili", + "original_lang_code": "mai", + "script": "Deva", + "class_name": "IndicNLPTokenizer" }, - "Arab": { - "full_object": "IndicNLPTokenizer(\"sd\")", - "original_lang_name": "sindhi", - "original_lang_code": "snd", - "scripts": [ - "Latn", - "Deva", - "Arab" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, - "children": [ - { - "name": "Dardic", - "iso_1_code": null, - "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"sd\")", - "original_lang_name": "sindhi", - "original_lang_code": "snd", - "scripts": [ - "Latn", - "Deva", - "Arab" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - }, - "Deva": { - "full_object": "IndicNLPTokenizer(\"sd\")", - "original_lang_name": "sindhi", - "original_lang_code": "snd", - "scripts": [ - "Latn", - "Deva", - "Arab" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - }, - "Arab": { - "full_object": "IndicNLPTokenizer(\"sd\")", - "original_lang_name": "sindhi", - "original_lang_code": "snd", - "scripts": [ - "Latn", - "Deva", - "Arab" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, + "node_i": "4193", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Northwestern", + "iso_1_code": null, + "iso_3_code": null, + "children": [ + { + "name": "Dardic", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Chitral", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Khowar", "iso_1_code": null, "iso_3_code": "khw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4244", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kalasha", "iso_1_code": null, "iso_3_code": "kls", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4245", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4243", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kashmiri", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"sd\")", - "original_lang_name": "sindhi", - "original_lang_code": "snd", - "scripts": [ - "Latn", - "Deva", - "Arab" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - }, - "Deva": { - "full_object": "IndicNLPTokenizer(\"sd\")", - "original_lang_name": "sindhi", - "original_lang_code": "snd", - "scripts": [ - "Latn", - "Deva", - "Arab" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - }, - "Arab": { - "full_object": "IndicNLPTokenizer(\"sd\")", - "original_lang_name": "sindhi", - "original_lang_code": "snd", - "scripts": [ - "Latn", - "Deva", - "Arab" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kashmiri", "iso_1_code": "ks", "iso_3_code": "kas", + "children": [], "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"sd\")", - "original_lang_name": "sindhi", - "original_lang_code": "snd", - "scripts": [ - "Latn", - "Deva", - "Arab" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "Arab": { + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "kashmiri", + "original_lang_code": "kas", + "script": "Arab", + "class_name": "IndicNLPTokenizer" }, "Deva": { - "full_object": "IndicNLPTokenizer(\"sd\")", - "original_lang_name": "sindhi", - "original_lang_code": "snd", - "scripts": [ - "Latn", - "Deva", - "Arab" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "IndicNLPTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "script": "Deva", + "class_name": "IndicNLPTokenizer" }, - "Arab": { - "full_object": "IndicNLPTokenizer(\"sd\")", - "original_lang_name": "sindhi", - "original_lang_code": "snd", - "scripts": [ - "Latn", - "Deva", - "Arab" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "4247", + "native_tokenizers": [ + "Arab" + ], "scripts": [ - "Latn", "Arab", + "Latn", "Deva" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Arab": { + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "kashmiri", + "original_lang_code": "kas", + "script": "Arab", + "class_name": "IndicNLPTokenizer" + }, + "Deva": { + "full_object": "IndicNLPTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "4246", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kohistani", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bateri", "iso_1_code": null, "iso_3_code": "btv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4249", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chilisso", "iso_1_code": null, "iso_3_code": "clh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4250", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gawri", "iso_1_code": null, "iso_3_code": "gwc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4251", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gowro", "iso_1_code": null, "iso_3_code": "gwf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4252", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kohistani, Indus", "iso_1_code": null, "iso_3_code": "mvy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4253", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mankiyali", "iso_1_code": null, "iso_3_code": "nlm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4254", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tirahi", "iso_1_code": null, "iso_3_code": "tra", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4255", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Torwali", "iso_1_code": null, "iso_3_code": "trw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4256", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Degano", "iso_1_code": null, "iso_3_code": "wsv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4257", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4248", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kunar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dameli", "iso_1_code": null, "iso_3_code": "dml", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4259", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gawar-Bati", "iso_1_code": null, "iso_3_code": "gwt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4260", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Grangali", "iso_1_code": null, "iso_3_code": "nli", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4261", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shumashti", "iso_1_code": null, "iso_3_code": "sts", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4262", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4258", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pashai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pashai, Northeast", "iso_1_code": null, "iso_3_code": "aee", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4264", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pashai, Northwest", "iso_1_code": null, "iso_3_code": "glh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4265", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pashai, Southwest", "iso_1_code": null, "iso_3_code": "psh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4266", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pashai, Southeast", "iso_1_code": null, "iso_3_code": "psi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4267", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4263", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shina", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Brokskat", "iso_1_code": null, "iso_3_code": "bkk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4269", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Palula", "iso_1_code": null, "iso_3_code": "phl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4270", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shina, Kohistani", "iso_1_code": null, "iso_3_code": "plk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4271", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shina", "iso_1_code": null, "iso_3_code": "scl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4272", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Savi", "iso_1_code": null, "iso_3_code": "sdg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4273", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kundal Shahi", "iso_1_code": null, "iso_3_code": "shd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4274", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ushojo", "iso_1_code": null, "iso_3_code": "ush", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4275", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kalkoti", "iso_1_code": null, "iso_3_code": "xka", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4276", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4268", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "4242", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Sindhi", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"sd\")", - "original_lang_name": "sindhi", - "original_lang_code": "snd", - "scripts": [ - "Latn", - "Deva", - "Arab" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "Arab": { + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "kashmiri", + "original_lang_code": "kas", + "script": "Arab", + "class_name": "IndicNLPTokenizer" }, "Deva": { - "full_object": "IndicNLPTokenizer(\"sd\")", - "original_lang_name": "sindhi", - "original_lang_code": "snd", - "scripts": [ - "Latn", - "Deva", - "Arab" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "IndicNLPTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "script": "Deva", + "class_name": "IndicNLPTokenizer" }, - "Arab": { - "full_object": "IndicNLPTokenizer(\"sd\")", - "original_lang_name": "sindhi", - "original_lang_code": "snd", - "scripts": [ - "Latn", - "Deva", - "Arab" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "4242", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Sindhi", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Jadgali", "iso_1_code": null, "iso_3_code": "jdg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4278", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kacchi", "iso_1_code": null, "iso_3_code": "kfr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4279", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lasi", "iso_1_code": null, "iso_3_code": "lss", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4280", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Luwati", "iso_1_code": null, "iso_3_code": "luv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4281", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sindhi Bhil", "iso_1_code": null, "iso_3_code": "sbn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4282", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sindhi", "iso_1_code": "sd", "iso_3_code": "snd", + "children": [], "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"sd\")", + "Arab": { + "full_object": "IndicNLPTokenizer(\"ur\")", "original_lang_name": "sindhi", "original_lang_code": "snd", - "scripts": [ - "Latn", - "Deva", - "Arab" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "script": "Arab", + "class_name": "IndicNLPTokenizer" }, "Deva": { - "full_object": "IndicNLPTokenizer(\"sd\")", - "original_lang_name": "sindhi", - "original_lang_code": "snd", - "scripts": [ - "Latn", - "Deva", - "Arab" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "IndicNLPTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "script": "Deva", + "class_name": "IndicNLPTokenizer" }, - "Arab": { - "full_object": "IndicNLPTokenizer(\"sd\")", - "original_lang_name": "sindhi", - "original_lang_code": "snd", - "scripts": [ - "Latn", - "Deva", - "Arab" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "4283", + "native_tokenizers": [ + "Arab" + ], "scripts": [ "Latn", "Arab", "Deva" - ], - "own_tokenizer": true + ] } ], + "tokenizers": { + "Arab": { + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" + }, + "Deva": { + "full_object": "IndicNLPTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "4277", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "4241", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Southern", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"mr\")", - "original_lang_name": "marathi", - "original_lang_code": "mar", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "Arab": { + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" }, - "Deva": { - "full_object": "SpaCyTokenizer(\"mr\")", - "original_lang_name": "marathi", - "original_lang_code": "mar", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "Deva": { + "full_object": "IndicNLPTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "script": "Deva", + "class_name": "IndicNLPTokenizer" }, - "Sinh": { - "full_object": "SpaCyTokenizer(\"si\")", - "original_lang_name": "sinhala", - "original_lang_code": "sin", - "scripts": [ - "Sinh" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "4241", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Southern", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Marathi", "iso_1_code": "mr", "iso_3_code": "mar", + "children": [], "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"mr\")", - "original_lang_name": "marathi", - "original_lang_code": "mar", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, "Deva": { - "full_object": "SpaCyTokenizer(\"mr\")", + "full_object": "IndicNLPTokenizer(\"mr\")", "original_lang_name": "marathi", "original_lang_code": "mar", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Deva", + "class_name": "IndicNLPTokenizer" + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "4285", + "native_tokenizers": [ + "Deva" + ], "scripts": [ "Deva", "Latn" - ], - "own_tokenizer": true + ] }, { "name": "Konkani", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"kok\")", - "original_lang_name": "konkani", - "original_lang_code": "kok", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": true - }, - "Deva": { - "full_object": "IndicNLPTokenizer(\"kok\")", - "original_lang_name": "konkani", - "original_lang_code": "kok", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Konkani, Goan", "iso_1_code": null, "iso_3_code": "gom", + "children": [], "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"kok\")", - "original_lang_name": "konkani", - "original_lang_code": "kok", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": true - }, "Deva": { - "full_object": "IndicNLPTokenizer(\"kok\")", - "original_lang_name": "konkani", - "original_lang_code": "kok", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": true + "full_object": "IndicNLPTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "4287", + "native_tokenizers": [], "scripts": [ "Latn", "Deva" - ], - "own_tokenizer": true + ] }, { "name": "Kukna", "iso_1_code": null, "iso_3_code": "kex", + "children": [], "tokenizers": { "Deva": { - "full_object": "IndicNLPTokenizer(\"kok\")", - "original_lang_name": "konkani", - "original_lang_code": "kok", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": true + "full_object": "IndicNLPTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "script": "Deva", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "4288", + "native_tokenizers": [], "scripts": [ "Deva" - ], - "own_tokenizer": false + ] }, { "name": "Katkari", "iso_1_code": null, "iso_3_code": "kfu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4289", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Konkani", "iso_1_code": null, "iso_3_code": "knn", - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"kok\")", - "original_lang_name": "konkani", - "original_lang_code": "kok", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": true - }, - "Deva": { - "full_object": "IndicNLPTokenizer(\"kok\")", - "original_lang_name": "konkani", - "original_lang_code": "kok", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "4290", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Phudagi", "iso_1_code": null, "iso_3_code": "phd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4291", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Samvedi", "iso_1_code": null, "iso_3_code": "smv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4292", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Varli", "iso_1_code": null, "iso_3_code": "vav", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4293", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "4286", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sinhalese-Maldivian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Sinh": { - "full_object": "SpaCyTokenizer(\"si\")", - "original_lang_name": "sinhala", - "original_lang_code": "sin", - "scripts": [ - "Sinh" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Maldivian", "iso_1_code": "dv", "iso_3_code": "div", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4295", + "native_tokenizers": [], "scripts": [ "Thaa" - ], - "own_tokenizer": false + ] }, { "name": "Sinhala", "iso_1_code": "si", "iso_3_code": "sin", + "children": [], "tokenizers": { "Sinh": { - "full_object": "SpaCyTokenizer(\"si\")", + "full_object": "IndicNLPTokenizer(\"si\")", "original_lang_name": "sinhala", "original_lang_code": "sin", - "scripts": [ - "Sinh" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Sinh", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "4296", - "scripts": [ + "native_tokenizers": [ "Sinh" ], - "own_tokenizer": true + "scripts": [ + "Sinh" + ] }, { "name": "Veddah", "iso_1_code": null, "iso_3_code": "ved", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4297", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Sinh": { + "full_object": "IndicNLPTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "script": "Sinh", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "4294", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bhalay", "iso_1_code": null, "iso_3_code": "bhx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4299", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Deccan", "iso_1_code": null, "iso_3_code": "dcc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4300", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gowlan", "iso_1_code": null, "iso_3_code": "goj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4301", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Varhadi-Nagpuri", "iso_1_code": null, "iso_3_code": "vah", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4302", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4298", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + }, + "Sinh": { + "full_object": "IndicNLPTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "script": "Sinh", + "class_name": "IndicNLPTokenizer" + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "4284", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Beng": { + "full_object": "IndicNLPTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "script": "Beng", + "class_name": "IndicNLPTokenizer" + }, + "Deva": { + "full_object": "IndicNLPTokenizer(\"mr\")", + "original_lang_name": "marathi", + "original_lang_code": "mar", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "sindhi", + "original_lang_code": "snd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" + }, + "Sinh": { + "full_object": "IndicNLPTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "script": "Sinh", + "class_name": "IndicNLPTokenizer" + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "4192", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tharu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"hi\")", - "original_lang_name": "hindi", - "original_lang_code": "hin", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Deva": { - "full_object": "SpaCyTokenizer(\"hi\")", - "original_lang_name": "hindi", - "original_lang_code": "hin", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Tharu, Rana", "iso_1_code": null, "iso_3_code": "thr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4304", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tharu, Kathariya", "iso_1_code": null, "iso_3_code": "tkt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4305", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern Tharu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"hi\")", - "original_lang_name": "hindi", - "original_lang_code": "hin", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Deva": { - "full_object": "SpaCyTokenizer(\"hi\")", - "original_lang_name": "hindi", - "original_lang_code": "hin", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Tharu, Central", "iso_1_code": null, "iso_3_code": "the", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4307", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tharu, Dangaura", "iso_1_code": null, "iso_3_code": "thl", + "children": [], "tokenizers": { "Deva": { - "full_object": "SpaCyTokenizer(\"hi\")", + "full_object": "IndicNLPTokenizer(\"hi\")", "original_lang_name": "hindi", "original_lang_code": "hin", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Deva", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "4308", + "native_tokenizers": [], "scripts": [ "Deva" - ], - "own_tokenizer": false + ] }, { "name": "Tharu, Mid-Eastern", "iso_1_code": null, "iso_3_code": "thq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4309", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "4306", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "4303", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Andh", "iso_1_code": null, "iso_3_code": "anr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4311", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bazigar", "iso_1_code": null, "iso_3_code": "bfr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4312", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chinali", "iso_1_code": null, "iso_3_code": "cih", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4313", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Danuwar", "iso_1_code": null, "iso_3_code": "dhw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4314", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Darai", "iso_1_code": null, "iso_3_code": "dry", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4315", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dewas Rai", "iso_1_code": null, "iso_3_code": "dwz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4316", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kanjari", "iso_1_code": null, "iso_3_code": "kft", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4317", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kumal", "iso_1_code": null, "iso_3_code": "kra", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4318", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lohar, Lahul", "iso_1_code": null, "iso_3_code": "lhl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4319", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Memoni", "iso_1_code": null, "iso_3_code": "mby", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4320", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oadki", "iso_1_code": null, "iso_3_code": "odk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4321", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pali", "iso_1_code": "pi", "iso_3_code": "pli", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4322", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vaagri Booli", "iso_1_code": null, "iso_3_code": "vaa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4323", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4310", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western Hindi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"hi\")", - "original_lang_name": "hindi", - "original_lang_code": "hin", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Deva": { - "full_object": "SpaCyTokenizer(\"hi\")", - "original_lang_name": "hindi", - "original_lang_code": "hin", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ur\")", - "original_lang_name": "urdu", - "original_lang_code": "urd", - "scripts": [ - "Latn", - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bundeli", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bundeli", "iso_1_code": null, "iso_3_code": "bns", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4326", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4325", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hindustani", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"hi\")", - "original_lang_name": "hindi", - "original_lang_code": "hin", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Deva": { - "full_object": "SpaCyTokenizer(\"hi\")", - "original_lang_name": "hindi", - "original_lang_code": "hin", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ur\")", - "original_lang_name": "urdu", - "original_lang_code": "urd", - "scripts": [ - "Latn", - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Hindi", "iso_1_code": "hi", "iso_3_code": "hin", + "children": [], "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"hi\")", - "original_lang_name": "hindi", - "original_lang_code": "hin", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, "Deva": { - "full_object": "SpaCyTokenizer(\"hi\")", + "full_object": "IndicNLPTokenizer(\"hi\")", "original_lang_name": "hindi", "original_lang_code": "hin", - "scripts": [ - "Latn", - "Deva" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Deva", + "class_name": "IndicNLPTokenizer" + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "4328", + "native_tokenizers": [ + "Deva" + ], "scripts": [ "Latn", "Deva" - ], - "own_tokenizer": true + ] }, { "name": "Urdu", "iso_1_code": "ur", "iso_3_code": "urd", + "children": [], "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ur\")", - "original_lang_name": "urdu", - "original_lang_code": "urd", - "scripts": [ - "Latn", - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, "Arab": { - "full_object": "SpaCyTokenizer(\"ur\")", + "full_object": "IndicNLPTokenizer(\"ur\")", "original_lang_name": "urdu", "original_lang_code": "urd", - "scripts": [ - "Latn", - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Arab", + "class_name": "IndicNLPTokenizer" + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "4329", + "native_tokenizers": [ + "Arab" + ], "scripts": [ "Latn", "Arab" - ], - "own_tokenizer": true + ] }, { "name": "Sansi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kabutra", "iso_1_code": null, "iso_3_code": "kbu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4331", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sansi", "iso_1_code": null, "iso_3_code": "ssi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4332", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4330", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "4327", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Haryanvi", "iso_1_code": null, "iso_3_code": "bgc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4334", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bhaya", "iso_1_code": null, "iso_3_code": "bhe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4335", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kanauji", "iso_1_code": null, "iso_3_code": "bjj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4336", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Braj Bhasha", "iso_1_code": null, "iso_3_code": "bra", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4337", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ghera", "iso_1_code": null, "iso_3_code": "ghr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4338", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gowli", "iso_1_code": null, "iso_3_code": "gok", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4339", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4333", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "4324", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "4068", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Iranian", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + }, + "Gujr": { + "full_object": "IndicNLPTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "script": "Gujr", + "class_name": "IndicNLPTokenizer" + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "script": "Guru", + "class_name": "IndicNLPTokenizer" + }, + "Beng": { + "full_object": "IndicNLPTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "script": "Beng", + "class_name": "IndicNLPTokenizer" + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" + }, + "Sinh": { + "full_object": "IndicNLPTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "script": "Sinh", + "class_name": "IndicNLPTokenizer" + }, "Latn": { "full_object": "StanzaTokenizer(\"kmr\")", "original_lang_name": "northern_kurdish", "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" }, "Cyrl": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" }, - "Arab": { - "full_object": "SpaCyTokenizer(\"fa\")", - "original_lang_name": "persian", - "original_lang_code": "fas", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "script": "Grek", + "class_name": "SpaCyTokenizer" } }, + "node_i": "4068", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Iranian", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Avestan", "iso_1_code": "ae", "iso_3_code": "ave", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4341", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern", - "iso_1_code": null, - "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Cyrl": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"fa\")", - "original_lang_name": "persian", - "original_lang_code": "fas", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Northeastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Cyrl": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ossetic", "iso_1_code": "os", "iso_3_code": "oss", + "children": [], "tokenizers": { "Cyrl": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4344", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Yagnobi", "iso_1_code": null, "iso_3_code": "yai", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4345", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yassic", "iso_1_code": null, "iso_3_code": "ysc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4346", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4343", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southeastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Cyrl": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"fa\")", - "original_lang_name": "persian", - "original_lang_code": "fas", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Pamir", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Cyrl": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ishkashimi", "iso_1_code": null, "iso_3_code": "isk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4349", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Munji", "iso_1_code": null, "iso_3_code": "mnj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4350", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sanglechi", "iso_1_code": null, "iso_3_code": "sgy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4351", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wakhi", "iso_1_code": null, "iso_3_code": "wbl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4352", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yadgha", "iso_1_code": null, "iso_3_code": "ydg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4353", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shugni-Yazgulami", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Cyrl": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Shughni", "iso_1_code": null, "iso_3_code": "sgh", + "children": [], "tokenizers": { "Cyrl": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4355", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Sarikoli", "iso_1_code": null, "iso_3_code": "srh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4356", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yazghulami", "iso_1_code": null, "iso_3_code": "yah", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4357", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4354", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4348", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pashto", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"fa\")", - "original_lang_name": "persian", - "original_lang_code": "fas", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Pashto, Southern", "iso_1_code": "ps", "iso_3_code": "pbt", + "children": [], "tokenizers": { "Arab": { - "full_object": "SpaCyTokenizer(\"fa\")", - "original_lang_name": "persian", - "original_lang_code": "fas", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "4359", + "native_tokenizers": [], "scripts": [ "Arab" - ], - "own_tokenizer": false + ] }, { "name": "Pashto, Northern", "iso_1_code": "ps", "iso_3_code": "pbu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4360", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pashto, Central", "iso_1_code": "ps", "iso_3_code": "pst", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4361", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waneci", "iso_1_code": null, "iso_3_code": "wne", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4362", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Arab": { + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "4358", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Arab": { + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4347", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Arab": { + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4342", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Cyrl": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"fa\")", - "original_lang_name": "persian", - "original_lang_code": "fas", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Northwestern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Cyrl": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"fa\")", - "original_lang_name": "persian", - "original_lang_code": "fas", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Balochi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"fa\")", - "original_lang_name": "persian", - "original_lang_code": "fas", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Balochi, Southern", "iso_1_code": null, "iso_3_code": "bcc", + "children": [], "tokenizers": { "Arab": { - "full_object": "SpaCyTokenizer(\"fa\")", - "original_lang_name": "persian", - "original_lang_code": "fas", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "4366", + "native_tokenizers": [], "scripts": [ "Arab" - ], - "own_tokenizer": false + ] }, { "name": "Balochi, Western", "iso_1_code": null, "iso_3_code": "bgn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4367", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Balochi, Eastern", "iso_1_code": null, "iso_3_code": "bgp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4368", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bashkardi", "iso_1_code": null, "iso_3_code": "bsg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4369", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koroshi", "iso_1_code": null, "iso_3_code": "ktl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4370", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Arab": { + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "4365", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Caspian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"fa\")", - "original_lang_name": "persian", - "original_lang_code": "fas", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Gilaki", "iso_1_code": null, "iso_3_code": "glk", + "children": [], "tokenizers": { "Arab": { - "full_object": "SpaCyTokenizer(\"fa\")", - "original_lang_name": "persian", - "original_lang_code": "fas", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "4372", + "native_tokenizers": [], "scripts": [ "Arab" - ], - "own_tokenizer": false + ] }, { "name": "Mazandarani", "iso_1_code": null, "iso_3_code": "mzn", + "children": [], "tokenizers": { "Arab": { - "full_object": "SpaCyTokenizer(\"fa\")", - "original_lang_name": "persian", - "original_lang_code": "fas", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "4373", + "native_tokenizers": [], "scripts": [ "Arab" - ], - "own_tokenizer": false + ] }, { "name": "Shahmirzadi", "iso_1_code": null, "iso_3_code": "srz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4374", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Arab": { + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "4371", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central Iran", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ashtiani", "iso_1_code": null, "iso_3_code": "atn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4376", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dari, Zoroastrian", "iso_1_code": null, "iso_3_code": "gbz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4377", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gazi", "iso_1_code": null, "iso_3_code": "gzi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4378", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khunsari", "iso_1_code": null, "iso_3_code": "kfm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4379", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Natanzi", "iso_1_code": null, "iso_3_code": "ntz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4380", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nayini", "iso_1_code": null, "iso_3_code": "nyq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4381", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Parsi-Dari", "iso_1_code": null, "iso_3_code": "prd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4382", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sivandi", "iso_1_code": null, "iso_3_code": "siy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4383", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Soi", "iso_1_code": null, "iso_3_code": "soj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4384", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vafsi", "iso_1_code": null, "iso_3_code": "vaf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4385", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4375", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kurdish", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Cyrl": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"fa\")", - "original_lang_name": "persian", - "original_lang_code": "fas", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kurdish, Central", "iso_1_code": "ku", "iso_3_code": "ckb", + "children": [], "tokenizers": { "Arab": { - "full_object": "SpaCyTokenizer(\"fa\")", - "original_lang_name": "persian", - "original_lang_code": "fas", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "4387", + "native_tokenizers": [], "scripts": [ "Arab" - ], - "own_tokenizer": false + ] }, { "name": "Kurdish, Northern", "iso_1_code": "ku", "iso_3_code": "kmr", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"kmr\")", "original_lang_name": "northern_kurdish", "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" }, "Cyrl": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4388", + "native_tokenizers": [ + "Latn" + ], "scripts": [ "Latn", "Cyrl" - ], - "own_tokenizer": true + ] }, { "name": "Laki", "iso_1_code": null, "iso_3_code": "lki", + "children": [], "tokenizers": { "Arab": { - "full_object": "SpaCyTokenizer(\"fa\")", - "original_lang_name": "persian", - "original_lang_code": "fas", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "4389", + "native_tokenizers": [], "scripts": [ "Arab" - ], - "own_tokenizer": false + ] }, { "name": "Kurdish, Southern", "iso_1_code": "ku", "iso_3_code": "sdh", + "children": [], "tokenizers": { "Arab": { - "full_object": "SpaCyTokenizer(\"fa\")", - "original_lang_name": "persian", - "original_lang_code": "fas", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "4390", + "native_tokenizers": [], "scripts": [ "Arab" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4386", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ormuri-Parachi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ormuri", "iso_1_code": null, "iso_3_code": "oru", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4392", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Parachi", "iso_1_code": null, "iso_3_code": "prc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4393", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4391", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Semnani", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lasgerdi", "iso_1_code": null, "iso_3_code": "lsa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4395", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sangisari", "iso_1_code": null, "iso_3_code": "sgr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4396", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Semnani", "iso_1_code": null, "iso_3_code": "smy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4397", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sorkhei", "iso_1_code": null, "iso_3_code": "sqo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4398", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4394", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Talysh", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Cyrl": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Alviri-Vidari", "iso_1_code": null, "iso_3_code": "avd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4400", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eshtehardi", "iso_1_code": null, "iso_3_code": "esh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4401", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gozarkhani", "iso_1_code": null, "iso_3_code": "goz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4402", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Harzani", "iso_1_code": null, "iso_3_code": "hrz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4403", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karingani", "iso_1_code": null, "iso_3_code": "kgn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4404", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koresh-e Rostam", "iso_1_code": null, "iso_3_code": "okh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4405", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Razajerdi", "iso_1_code": null, "iso_3_code": "rat", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4406", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rudbari", "iso_1_code": null, "iso_3_code": "rdb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4407", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shahrudi", "iso_1_code": null, "iso_3_code": "shm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4408", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Takestani", "iso_1_code": null, "iso_3_code": "tks", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4409", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Talysh", "iso_1_code": null, "iso_3_code": "tly", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"kmr\")", "original_lang_name": "northern_kurdish", "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "4410", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Taromi, Upper", "iso_1_code": null, "iso_3_code": "tov", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4411", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maraghei", "iso_1_code": null, "iso_3_code": "vmh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4412", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kho\u2019ini", "iso_1_code": null, "iso_3_code": "xkc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4413", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kajali", "iso_1_code": null, "iso_3_code": "xkj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4414", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kabatei", "iso_1_code": null, "iso_3_code": "xkp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4415", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "4399", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dezfuli", "iso_1_code": null, "iso_3_code": "def", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4417", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4416", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zaza-Gorani", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Cyrl": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"fa\")", - "original_lang_name": "persian", - "original_lang_code": "fas", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bajelani", "iso_1_code": null, "iso_3_code": "bjm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4419", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zazaki, Southern", "iso_1_code": null, "iso_3_code": "diq", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"kmr\")", "original_lang_name": "northern_kurdish", "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "4420", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Gurani", "iso_1_code": null, "iso_3_code": "hac", + "children": [], "tokenizers": { "Arab": { - "full_object": "SpaCyTokenizer(\"fa\")", - "original_lang_name": "persian", - "original_lang_code": "fas", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "4421", + "native_tokenizers": [], "scripts": [ "Arab" - ], - "own_tokenizer": false + ] }, { "name": "Zazaki, Northern", "iso_1_code": null, "iso_3_code": "kiu", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"kmr\")", "original_lang_name": "northern_kurdish", "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "4422", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Shabak", "iso_1_code": null, "iso_3_code": "sdb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4423", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sarli", "iso_1_code": null, "iso_3_code": "sdf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4424", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "4418", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "4364", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Southwestern", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"fa\")", - "original_lang_name": "persian", - "original_lang_code": "fas", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, "Latn": { "full_object": "StanzaTokenizer(\"kmr\")", "original_lang_name": "northern_kurdish", "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" }, "Cyrl": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" } }, + "node_i": "4364", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Southwestern", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Fars", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Fars, Southwestern", "iso_1_code": null, "iso_3_code": "fay", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4427", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lari", "iso_1_code": null, "iso_3_code": "lrl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4428", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4426", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Luri", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"fa\")", - "original_lang_name": "persian", - "original_lang_code": "fas", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bakhti\u00e2ri", "iso_1_code": null, "iso_3_code": "bqi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4430", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Luri, Northern", "iso_1_code": null, "iso_3_code": "lrc", + "children": [], "tokenizers": { "Arab": { - "full_object": "SpaCyTokenizer(\"fa\")", - "original_lang_name": "persian", - "original_lang_code": "fas", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "4431", + "native_tokenizers": [], "scripts": [ "Arab" - ], - "own_tokenizer": false + ] }, { "name": "Luri, Southern", "iso_1_code": null, "iso_3_code": "luz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4432", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kumzari", "iso_1_code": null, "iso_3_code": "zum", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4433", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Arab": { + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "4429", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Persian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"fa\")", - "original_lang_name": "persian", - "original_lang_code": "fas", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Latn": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Cyrl": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Aimaq", "iso_1_code": null, "iso_3_code": "aiq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4435", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bukharic", "iso_1_code": null, "iso_3_code": "bhh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4436", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dehwari", "iso_1_code": null, "iso_3_code": "deh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4437", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hazaragi", "iso_1_code": null, "iso_3_code": "haz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4438", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dzhidi", "iso_1_code": null, "iso_3_code": "jpr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4439", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Persian, Iranian", "iso_1_code": "fa", "iso_3_code": "pes", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"fa\")", - "original_lang_name": "persian", - "original_lang_code": "fas", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "4440", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Pahlavani", "iso_1_code": null, "iso_3_code": "phv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4441", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dari", "iso_1_code": "fa", "iso_3_code": "prs", - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"fa\")", - "original_lang_name": "persian", - "original_lang_code": "fas", - "scripts": [ - "Arab" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "4442", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Tajik", "iso_1_code": "tg", "iso_3_code": "tgk", + "children": [], "tokenizers": { "Cyrl": { - "full_object": "StanzaTokenizer(\"kmr\")", - "original_lang_name": "northern_kurdish", - "original_lang_code": "kmr", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4443", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4434", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tat", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Judeo-Tat", "iso_1_code": null, "iso_3_code": "jdt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4445", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tat, Muslim", "iso_1_code": null, "iso_3_code": "ttt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4446", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4444", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Arab": { + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4425", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4363", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4340", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nuristani", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ashkun", "iso_1_code": null, "iso_3_code": "ask", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4448", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kateviri", "iso_1_code": null, "iso_3_code": "bsh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4449", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Prasuni", "iso_1_code": null, "iso_3_code": "prn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4450", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tregami", "iso_1_code": null, "iso_3_code": "trm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4451", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waigali", "iso_1_code": null, "iso_3_code": "wbk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4452", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Komviri", "iso_1_code": null, "iso_3_code": "xvi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4453", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4447", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Badeshi", "iso_1_code": null, "iso_3_code": "bdz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4455", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4454", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] + } + ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + }, + "Gujr": { + "full_object": "IndicNLPTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "script": "Gujr", + "class_name": "IndicNLPTokenizer" + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "script": "Guru", + "class_name": "IndicNLPTokenizer" + }, + "Beng": { + "full_object": "IndicNLPTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "script": "Beng", + "class_name": "IndicNLPTokenizer" + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" + }, + "Sinh": { + "full_object": "IndicNLPTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "script": "Sinh", + "class_name": "IndicNLPTokenizer" + }, + "Latn": { + "full_object": "StanzaTokenizer(\"kmr\")", + "original_lang_name": "northern_kurdish", + "original_lang_code": "kmr", + "script": "Latn", + "class_name": "StanzaTokenizer" + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "script": "Grek", + "class_name": "SpaCyTokenizer" } - ], + }, "node_i": "4067", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Italic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"es\")", - "original_lang_name": "spanish", - "original_lang_code": "spa", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Cyrl": { - "full_object": "SpaCyTokenizer(\"ro\")", - "original_lang_name": "romanian", - "original_lang_code": "ron", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Latino-Faliscan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"la\")", - "original_lang_name": "latin", - "original_lang_code": "lat", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Latin", "iso_1_code": "la", "iso_3_code": "lat", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"la\")", "original_lang_name": "latin", "original_lang_code": "lat", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4458", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"la\")", + "original_lang_name": "latin", + "original_lang_code": "lat", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4457", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Romance", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"es\")", - "original_lang_name": "spanish", - "original_lang_code": "spa", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Cyrl": { - "full_object": "SpaCyTokenizer(\"ro\")", - "original_lang_name": "romanian", - "original_lang_code": "ron", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ro\")", - "original_lang_name": "romanian", - "original_lang_code": "ron", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Cyrl": { - "full_object": "SpaCyTokenizer(\"ro\")", - "original_lang_name": "romanian", - "original_lang_code": "ron", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Romanian", "iso_1_code": "ro", "iso_3_code": "ron", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"ro\")", "original_lang_name": "romanian", "original_lang_code": "ron", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" }, "Cyrl": { - "full_object": "SpaCyTokenizer(\"ro\")", - "original_lang_name": "romanian", - "original_lang_code": "ron", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4461", + "native_tokenizers": [ + "Latn" + ], "scripts": [ "Latn", "Cyrl" - ], - "own_tokenizer": true + ] }, { "name": "Romanian, Istro", "iso_1_code": null, "iso_3_code": "ruo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4462", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aromanian", "iso_1_code": null, "iso_3_code": "rup", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"ro\")", "original_lang_name": "romanian", "original_lang_code": "ron", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4463", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Romanian, Megleno", "iso_1_code": null, "iso_3_code": "ruq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4464", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ro\")", + "original_lang_name": "romanian", + "original_lang_code": "ron", + "script": "Latn", + "class_name": "SpaCyTokenizer" + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4460", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Italo-Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"es\")", - "original_lang_name": "spanish", - "original_lang_code": "spa", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Italo-Dalmatian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"it\")", - "original_lang_name": "italian", - "original_lang_code": "ita", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Dalmatian", "iso_1_code": null, "iso_3_code": "dlm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4467", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Istriot", "iso_1_code": null, "iso_3_code": "ist", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4468", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Italian", "iso_1_code": "it", "iso_3_code": "ita", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"it\")", "original_lang_name": "italian", "original_lang_code": "ita", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4469", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] }, { "name": "Judeo-Italian", "iso_1_code": null, "iso_3_code": "itk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4470", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Napoletano", "iso_1_code": null, "iso_3_code": "nap", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"it\")", "original_lang_name": "italian", "original_lang_code": "ita", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4471", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sicilian", "iso_1_code": null, "iso_3_code": "scn", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"it\")", "original_lang_name": "italian", "original_lang_code": "ita", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4472", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"it\")", + "original_lang_name": "italian", + "original_lang_code": "ita", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4466", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"es\")", - "original_lang_name": "spanish", - "original_lang_code": "spa", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Gallo-Iberian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"es\")", - "original_lang_name": "spanish", - "original_lang_code": "spa", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Gallo-Romance", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"fr\")", - "original_lang_name": "french", - "original_lang_code": "fra", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Gallo-Italian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"lij\")", - "original_lang_name": "ligurian", - "original_lang_code": "lij", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Emilian", "iso_1_code": null, "iso_3_code": "egl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4477", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ligurian", "iso_1_code": null, "iso_3_code": "lij", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lij\")", "original_lang_name": "ligurian", "original_lang_code": "lij", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4478", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] }, { "name": "Lombard", "iso_1_code": null, "iso_3_code": "lmo", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lij\")", "original_lang_name": "ligurian", "original_lang_code": "lij", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4479", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Piedmontese", "iso_1_code": null, "iso_3_code": "pms", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lij\")", "original_lang_name": "ligurian", "original_lang_code": "lij", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4480", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Romagnol", "iso_1_code": null, "iso_3_code": "rgn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4481", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Venetian", "iso_1_code": null, "iso_3_code": "vec", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lij\")", "original_lang_name": "ligurian", "original_lang_code": "lij", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4482", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lij\")", + "original_lang_name": "ligurian", + "original_lang_code": "lij", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4476", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gallo-Rhaetian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"fr\")", - "original_lang_name": "french", - "original_lang_code": "fra", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "O\u00efl", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"fr\")", - "original_lang_name": "french", - "original_lang_code": "fra", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "French", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"fr\")", - "original_lang_name": "french", - "original_lang_code": "fra", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "French", "iso_1_code": "fr", "iso_3_code": "fra", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"fr\")", "original_lang_name": "french", "original_lang_code": "fra", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4486", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] }, { "name": "French, Cajun", "iso_1_code": null, "iso_3_code": "frc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4487", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Guern\u00e9siais", "iso_1_code": null, "iso_3_code": "nrf", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"fr\")", "original_lang_name": "french", "original_lang_code": "fra", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4488", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Picard", "iso_1_code": null, "iso_3_code": "pcd", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"fr\")", "original_lang_name": "french", "original_lang_code": "fra", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4489", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Walloon", "iso_1_code": "wa", "iso_3_code": "wln", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"fr\")", "original_lang_name": "french", "original_lang_code": "fra", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4490", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "4485", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Southeastern", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"fr\")", "original_lang_name": "french", "original_lang_code": "fra", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "4485", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Southeastern", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Arpitan", "iso_1_code": null, "iso_3_code": "frp", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"fr\")", "original_lang_name": "french", "original_lang_code": "fra", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4492", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fr\")", + "original_lang_name": "french", + "original_lang_code": "fra", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4491", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "4484", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Rhaetian", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"fr\")", "original_lang_name": "french", "original_lang_code": "fra", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "4484", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Rhaetian", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Friulian", "iso_1_code": null, "iso_3_code": "fur", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"fr\")", "original_lang_name": "french", "original_lang_code": "fra", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4494", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ladin", "iso_1_code": null, "iso_3_code": "lld", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"fr\")", "original_lang_name": "french", "original_lang_code": "fra", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4495", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Romansh", "iso_1_code": "rm", "iso_3_code": "roh", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"fr\")", "original_lang_name": "french", "original_lang_code": "fra", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4496", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fr\")", + "original_lang_name": "french", + "original_lang_code": "fra", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4493", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fr\")", + "original_lang_name": "french", + "original_lang_code": "fra", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4483", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fr\")", + "original_lang_name": "french", + "original_lang_code": "fra", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4475", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ibero-Romance", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"es\")", - "original_lang_name": "spanish", - "original_lang_code": "spa", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "East Iberian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ca\")", - "original_lang_name": "catalan", - "original_lang_code": "cat", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Catalan", "iso_1_code": "ca", "iso_3_code": "cat", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"ca\")", "original_lang_name": "catalan", "original_lang_code": "cat", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4499", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"ca\")", + "original_lang_name": "catalan", + "original_lang_code": "cat", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4498", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oc", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"es\")", - "original_lang_name": "spanish", - "original_lang_code": "spa", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Occitan", "iso_1_code": "oc", "iso_3_code": "oci", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"es\")", "original_lang_name": "spanish", "original_lang_code": "spa", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4501", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Shuadit", "iso_1_code": null, "iso_3_code": "sdt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4502", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "4500", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "West Iberian", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"es\")", "original_lang_name": "spanish", "original_lang_code": "spa", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "4500", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "West Iberian", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Asturo-Leonese", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"es\")", - "original_lang_name": "spanish", - "original_lang_code": "spa", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Asturian", "iso_1_code": null, "iso_3_code": "ast", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"es\")", "original_lang_name": "spanish", "original_lang_code": "spa", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4505", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mirandese", "iso_1_code": null, "iso_3_code": "mwl", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"es\")", "original_lang_name": "spanish", "original_lang_code": "spa", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4506", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "4504", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Castilian", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"es\")", "original_lang_name": "spanish", "original_lang_code": "spa", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "4504", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Castilian", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Extremaduran", "iso_1_code": null, "iso_3_code": "ext", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"es\")", "original_lang_name": "spanish", "original_lang_code": "spa", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4508", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ladino", "iso_1_code": null, "iso_3_code": "lad", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"es\")", "original_lang_name": "spanish", "original_lang_code": "spa", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4509", + "native_tokenizers": [], "scripts": [ "Latn", "Hebr" - ], - "own_tokenizer": false + ] }, { "name": "Spanish", "iso_1_code": "es", "iso_3_code": "spa", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"es\")", "original_lang_name": "spanish", "original_lang_code": "spa", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4510", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] }, { "name": "Spanish, Charapa", "iso_1_code": null, "iso_3_code": "spq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4511", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4507", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Portuguese-Galician", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"pt\")", - "original_lang_name": "portuguese", - "original_lang_code": "por", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Minderico", "iso_1_code": null, "iso_3_code": "drc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4513", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Fala", "iso_1_code": null, "iso_3_code": "fax", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4514", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Galician", "iso_1_code": "gl", "iso_3_code": "glg", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"gl\")", "original_lang_name": "galician", "original_lang_code": "glg", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "4515", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] }, { "name": "Portuguese", "iso_1_code": "pt", "iso_3_code": "por", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"pt\")", "original_lang_name": "portuguese", "original_lang_code": "por", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4516", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"pt\")", + "original_lang_name": "portuguese", + "original_lang_code": "por", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4512", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4503", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4497", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "4474", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Pyrenean-Mozarabic", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"es\")", "original_lang_name": "spanish", "original_lang_code": "spa", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "4474", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Pyrenean-Mozarabic", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Pyrenean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"es\")", - "original_lang_name": "spanish", - "original_lang_code": "spa", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Aragonese", "iso_1_code": "an", "iso_3_code": "arg", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"es\")", "original_lang_name": "spanish", "original_lang_code": "spa", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4519", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4518", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4517", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4473", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "4465", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Southern", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"es\")", "original_lang_name": "spanish", "original_lang_code": "spa", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "4465", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Southern", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Corsican", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"es\")", - "original_lang_name": "spanish", - "original_lang_code": "spa", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Corsican", "iso_1_code": "co", "iso_3_code": "cos", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"es\")", "original_lang_name": "spanish", "original_lang_code": "spa", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4522", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "4521", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Sardinian", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"es\")", "original_lang_name": "spanish", "original_lang_code": "spa", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "4521", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Sardinian", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Sardinian, Sassarese", "iso_1_code": "sc", "iso_3_code": "sdc", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"es\")", "original_lang_name": "spanish", "original_lang_code": "spa", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4524", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sardinian, Gallurese", "iso_1_code": "sc", "iso_3_code": "sdn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4525", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sardinian, Logudorese", "iso_1_code": "sc", "iso_3_code": "src", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4526", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sardinian, Campidanese", "iso_1_code": "sc", "iso_3_code": "sro", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4527", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4523", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4520", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "script": "Latn", + "class_name": "SpaCyTokenizer" + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4459", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"es\")", + "original_lang_name": "spanish", + "original_lang_code": "spa", + "script": "Latn", + "class_name": "SpaCyTokenizer" + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4456", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Armn": { + "full_object": "SpaCyTokenizer(\"hy\")", + "original_lang_name": "armenian", + "original_lang_code": "hye", + "script": "Armn", + "class_name": "SpaCyTokenizer" + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"en\")", + "original_lang_name": "english", + "original_lang_code": "eng", + "script": "Latn", + "class_name": "SpaCyTokenizer" + }, + "Cyrl": { + "full_object": "SpaCyTokenizer(\"ru\")", + "original_lang_name": "russian", + "original_lang_code": "rus", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + }, + "Grek": { + "full_object": "SpaCyTokenizer(\"el\")", + "original_lang_name": "greek", + "original_lang_code": "ell", + "script": "Grek", + "class_name": "SpaCyTokenizer" + }, + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "hindi", + "original_lang_code": "hin", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + }, + "Gujr": { + "full_object": "IndicNLPTokenizer(\"gu\")", + "original_lang_name": "gujarati", + "original_lang_code": "guj", + "script": "Gujr", + "class_name": "IndicNLPTokenizer" + }, + "Guru": { + "full_object": "IndicNLPTokenizer(\"pa\")", + "original_lang_name": "punjabi", + "original_lang_code": "pan", + "script": "Guru", + "class_name": "IndicNLPTokenizer" + }, + "Beng": { + "full_object": "IndicNLPTokenizer(\"bn\")", + "original_lang_name": "bengali", + "original_lang_code": "ben", + "script": "Beng", + "class_name": "IndicNLPTokenizer" + }, + "Arab": { + "full_object": "IndicNLPTokenizer(\"ur\")", + "original_lang_name": "urdu", + "original_lang_code": "urd", + "script": "Arab", + "class_name": "IndicNLPTokenizer" + }, + "Sinh": { + "full_object": "IndicNLPTokenizer(\"si\")", + "original_lang_name": "sinhala", + "original_lang_code": "sin", + "script": "Sinh", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "3919", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Iroquoian.json b/data/Iroquoian.json index 4aaa42ca592ca8717360c0fee774b07a30aa74bb..ff0d57debcabbe27936f8dd06f3f4ebe7fb4a15f 100644 --- a/data/Iroquoian.json +++ b/data/Iroquoian.json @@ -2,222 +2,222 @@ "name": "Iroquoian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cherokee", "iso_1_code": null, "iso_3_code": "chr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4529", + "native_tokenizers": [], "scripts": [ "Cher", "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Northern Iroquoian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Five Nations-Huronian-Susquehannock", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Susquehannock", "iso_1_code": null, "iso_3_code": "sqn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4532", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Five Nations-Susquehannock", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cayuga", "iso_1_code": null, "iso_3_code": "cay", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4534", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Onondaga", "iso_1_code": null, "iso_3_code": "ono", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4535", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Seneca", "iso_1_code": null, "iso_3_code": "see", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4536", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mohawk-Oneida", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mohawk", "iso_1_code": null, "iso_3_code": "moh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4538", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Oneida", "iso_1_code": null, "iso_3_code": "one", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4539", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4537", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4533", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Huronian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Laurentian", "iso_1_code": null, "iso_3_code": "lre", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4541", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Huron-Petun", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Wendat", "iso_1_code": null, "iso_3_code": "wdt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4543", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wyandot", "iso_1_code": null, "iso_3_code": "wyn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4544", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4542", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4540", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4531", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tuscarora-Nottoway", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nottoway", "iso_1_code": null, "iso_3_code": "ntw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4546", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nottoway-Meherrin", "iso_1_code": null, "iso_3_code": "nwy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4547", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tuscarora", "iso_1_code": null, "iso_3_code": "tus", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4548", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4545", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4530", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4528", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Jabutian.json b/data/Jabutian.json index 9d0ab191bfb1493cd3501fb51f13419b711bf51c..2ff85d3375147cf4befc43bcbca5e60b8363a302 100644 --- a/data/Jabutian.json +++ b/data/Jabutian.json @@ -2,30 +2,30 @@ "name": "Jabutian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Arikap\u00fa", "iso_1_code": null, "iso_3_code": "ark", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4550", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jabut\u00ed", "iso_1_code": null, "iso_3_code": "jbt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4551", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4549", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Japonic.json b/data/Japonic.json index d22f41be49a912c14deaa6691c06d6ccaba3f222..af1bbd1f279a635432f28b89a89bdc0bb82d47db 100644 --- a/data/Japonic.json +++ b/data/Japonic.json @@ -2,209 +2,205 @@ "name": "Japonic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Jpan": { - "full_object": "SpaCyTokenizer(\"ja\"), ", - "original_lang_name": "japanese", - "original_lang_code": "jpn", - "scripts": [ - "Jpan" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Japanese", "iso_1_code": "ja", "iso_3_code": "jpn", + "children": [], "tokenizers": { "Jpan": { - "full_object": "SpaCyTokenizer(\"ja\"), ", + "full_object": "SpaCyTokenizer(\"ja\"),", "original_lang_name": "japanese", "original_lang_code": "jpn", - "scripts": [ - "Jpan" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Jpan", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4553", - "scripts": [ + "native_tokenizers": [ "Jpan" ], - "own_tokenizer": true + "scripts": [ + "Jpan" + ] }, { "name": "Ryukyuan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Amami-Okinawan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Northern Amami-Okinawan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Amami-Oshima, Southern", "iso_1_code": null, "iso_3_code": "ams", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4557", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kikai", "iso_1_code": null, "iso_3_code": "kzg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4558", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Amami-Oshima, Northern", "iso_1_code": null, "iso_3_code": "ryn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4559", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Toku-No-Shima", "iso_1_code": null, "iso_3_code": "tkn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4560", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4556", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern Amami-Okinawan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Oki-No-Erabu", "iso_1_code": null, "iso_3_code": "okn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4562", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Okinawan, Central", "iso_1_code": null, "iso_3_code": "ryu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4563", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kunigami", "iso_1_code": null, "iso_3_code": "xug", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4564", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yoron", "iso_1_code": null, "iso_3_code": "yox", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4565", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4561", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4555", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sakishima", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Miyako", "iso_1_code": null, "iso_3_code": "mvi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4567", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yaeyama", "iso_1_code": null, "iso_3_code": "rys", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4568", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yonaguni", "iso_1_code": null, "iso_3_code": "yoi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4569", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4566", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4554", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Jpan": { + "full_object": "SpaCyTokenizer(\"ja\"),", + "original_lang_name": "japanese", + "original_lang_code": "jpn", + "script": "Jpan", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4552", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Jean.json b/data/Jean.json index cbb00769112c1280f9bdf9c1dce6a974b345986e..31d9f933c983ab901ef5e25642b5e92a8638bdc1 100644 --- a/data/Jean.json +++ b/data/Jean.json @@ -2,233 +2,233 @@ "name": "Jean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Acro\u00e1", "iso_1_code": null, "iso_3_code": "acs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4572", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Xav\u00e1nte", "iso_1_code": null, "iso_3_code": "xav", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4573", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Xer\u00e9nte", "iso_1_code": null, "iso_3_code": "xer", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4574", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Xakriab\u00e1", "iso_1_code": null, "iso_3_code": "xkr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4575", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4571", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Apinag\u00e9", "iso_1_code": null, "iso_3_code": "apn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4577", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Panar\u00e1", "iso_1_code": null, "iso_3_code": "kre", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4578", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Suy\u00e1", "iso_1_code": null, "iso_3_code": "suy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4579", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kayap\u00f3", "iso_1_code": null, "iso_3_code": "txu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4580", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Timbira", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gavi\u00e3o, Par\u00e1", "iso_1_code": null, "iso_3_code": "gvp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4582", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Canela", "iso_1_code": null, "iso_3_code": "ram", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4583", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Krah\u00f4", "iso_1_code": null, "iso_3_code": "xra", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4584", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kreye", "iso_1_code": null, "iso_3_code": "xre", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4585", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Krikati-Timbira", "iso_1_code": null, "iso_3_code": "xri", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4586", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4581", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4576", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Xokleng", "iso_1_code": null, "iso_3_code": "xok", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4588", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaingang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kaingang", "iso_1_code": null, "iso_3_code": "kgp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4590", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kaing\u00e1ng, S\u00e3o Paulo", "iso_1_code": null, "iso_3_code": "zkp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4591", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4589", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4587", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4570", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Jicaquean.json b/data/Jicaquean.json index 9a1889dae4093f92e80b4234c8e2a5c58402616b..c64407cfa77b9179f92bdf09e538aa71d1ec3a5a 100644 --- a/data/Jicaquean.json +++ b/data/Jicaquean.json @@ -2,22 +2,22 @@ "name": "Jicaquean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tol", "iso_1_code": null, "iso_3_code": "jic", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4593", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "4592", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Jivaroan.json b/data/Jivaroan.json index f8f06e737b83518a1c60d60cd823df662d947b0b..21f4aeded7b4bc8ba2e771d18b32c5392b4aab9f 100644 --- a/data/Jivaroan.json +++ b/data/Jivaroan.json @@ -2,69 +2,69 @@ "name": "Jivaroan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Awaj\u00fan", "iso_1_code": null, "iso_3_code": "agr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4595", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "J\u00edvaro", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Achuar-Shiwiar", "iso_1_code": null, "iso_3_code": "acu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4597", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Wamp\u00eds", "iso_1_code": null, "iso_3_code": "hub", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4598", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Shuar", "iso_1_code": null, "iso_3_code": "jiv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4599", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "4596", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4594", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Kamakanan.json b/data/Kamakanan.json index 40b1db8617ecc9e6bf4317ee6481801734054a95..7f8efc5e6f0e9f4c0d167ef3b895b56a44a2b34c 100644 --- a/data/Kamakanan.json +++ b/data/Kamakanan.json @@ -2,31 +2,31 @@ "name": "Kamakanan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kamak\u00e1n", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kamakan", "iso_1_code": null, "iso_3_code": "vkm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4602", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4601", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4600", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git "a/data/Karaj\303\241.json" "b/data/Karaj\303\241.json" index 61eed10c6502231af89f15c5cb7838610369d796..fbd785882c1dc480aabc97e9af39a805c1dcf864 100644 --- "a/data/Karaj\303\241.json" +++ "b/data/Karaj\303\241.json" @@ -2,22 +2,22 @@ "name": "Karaj\u00e1", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Karaj\u00e1", "iso_1_code": null, "iso_3_code": "kpj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4604", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "4603", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Kartvelian.json b/data/Kartvelian.json index 0203fa0bfbb6ba40fa3f19d675876ffdcadb3f00..cd69324b6e30df438c83ac476d3a89222fc62b19 100644 --- a/data/Kartvelian.json +++ b/data/Kartvelian.json @@ -2,97 +2,97 @@ "name": "Kartvelian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Georgian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Judeo-Georgian", "iso_1_code": null, "iso_3_code": "jge", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4607", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Georgian", "iso_1_code": "ka", "iso_3_code": "kat", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4608", + "native_tokenizers": [], "scripts": [ "Geor" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "4606", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Svan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Svan", "iso_1_code": null, "iso_3_code": "sva", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4610", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4609", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Laz", "iso_1_code": null, "iso_3_code": "lzz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4612", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mingrelian", "iso_1_code": null, "iso_3_code": "xmf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4613", + "native_tokenizers": [], "scripts": [ "Geor" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "4611", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4605", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Katukinan.json b/data/Katukinan.json index 4edb529f496ea9258d587378e9d5caa595fac028..681e594749d8436c5d428602f5990f4cf7710c42 100644 --- a/data/Katukinan.json +++ b/data/Katukinan.json @@ -2,40 +2,40 @@ "name": "Katukinan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Katuk\u00edna", "iso_1_code": null, "iso_3_code": "kav", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4615", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kanamar\u00ed", "iso_1_code": null, "iso_3_code": "knm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4616", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Katawixi", "iso_1_code": null, "iso_3_code": "xat", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4617", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4614", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Kaure.json b/data/Kaure.json index 96b0eef1173301c1014c3082bf86225dc970ccae..2d279dbd623161284e7661de1d2f1a29a4f185fc 100644 --- a/data/Kaure.json +++ b/data/Kaure.json @@ -2,62 +2,62 @@ "name": "Kaure", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kapore", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kapauri", "iso_1_code": null, "iso_3_code": "khp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4620", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4619", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaure Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kaure", "iso_1_code": null, "iso_3_code": "bpp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4622", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kosare", "iso_1_code": null, "iso_3_code": "kiq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4623", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4621", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4618", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Kaweskaran.json b/data/Kaweskaran.json index 0abf44a0878ab88fc7f58ea02556e7b783298764..2a043218581f387d6eaee3b49a0c20a5c2d13867 100644 --- a/data/Kaweskaran.json +++ b/data/Kaweskaran.json @@ -2,20 +2,20 @@ "name": "Kaweskaran", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Qawasqar", "iso_1_code": null, "iso_3_code": "alc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4625", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4624", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Keresan.json b/data/Keresan.json index c96d553e3644c4de7c8df7706084e5dd8a9a66b0..8850316267f775ff7d1277179ae73617265abbd5 100644 --- a/data/Keresan.json +++ b/data/Keresan.json @@ -2,30 +2,30 @@ "name": "Keresan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Keres, Eastern", "iso_1_code": null, "iso_3_code": "kee", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4627", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Keres, Western", "iso_1_code": null, "iso_3_code": "kjq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4628", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4626", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Khoe-Kwadi.json b/data/Khoe-Kwadi.json index 255d90df65380d453470d45368ced0854e07a07e..57163daee41163875131adf532f90d4b93a0b40e 100644 --- a/data/Khoe-Kwadi.json +++ b/data/Khoe-Kwadi.json @@ -2,254 +2,254 @@ "name": "Khoe-Kwadi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Khoe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kalahari Khoe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "North-Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Shua", "iso_1_code": null, "iso_3_code": "shg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4633", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4632", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northeast", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tshuwau", "iso_1_code": null, "iso_3_code": "hio", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4635", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kua", "iso_1_code": null, "iso_3_code": "tyu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4636", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4634", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northwest", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "\u01c1Ani", "iso_1_code": null, "iso_3_code": "hnh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4638", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khwedam", "iso_1_code": null, "iso_3_code": "xuu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4639", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4637", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southwest", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "\u01c1Gana", "iso_1_code": null, "iso_3_code": "gnk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4641", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "\u01c0Gwi", "iso_1_code": null, "iso_3_code": "gwj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4642", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Naro", "iso_1_code": null, "iso_3_code": "nhr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4643", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "4640", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4631", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khoekhoe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hainum", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hai|\u01c1om", "iso_1_code": null, "iso_3_code": "hgm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4646", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4645", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nama", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Korana", "iso_1_code": null, "iso_3_code": "kqz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4648", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khoekhoe", "iso_1_code": null, "iso_3_code": "naq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4649", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Xiri", "iso_1_code": null, "iso_3_code": "xii", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4650", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4647", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4644", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4630", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kwadi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kwadi", "iso_1_code": null, "iso_3_code": "kwz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4652", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4651", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4629", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Kiowa-Tanoan.json b/data/Kiowa-Tanoan.json index 5682dec179bf0cfb864486f1ba17fcfd021527c9..0fe160c31cac67ae171475dec0f127f36345f410 100644 --- a/data/Kiowa-Tanoan.json +++ b/data/Kiowa-Tanoan.json @@ -2,83 +2,83 @@ "name": "Kiowa-Tanoan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kiowa", "iso_1_code": null, "iso_3_code": "kio", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4654", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Piro", "iso_1_code": null, "iso_3_code": "pie", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4655", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tewa", "iso_1_code": null, "iso_3_code": "tew", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4656", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Jemez", "iso_1_code": null, "iso_3_code": "tow", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4657", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tanoan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tiwa, Southern", "iso_1_code": null, "iso_3_code": "tix", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4659", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tiwa, Northern", "iso_1_code": null, "iso_3_code": "twf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4660", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4658", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4653", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Koreanic.json b/data/Koreanic.json index 6cab6780901ed3b01d4e32f169c03676c3248d01..d3f01b285f0864f848ff1ad222b9ccb375bfe3a8 100644 --- a/data/Koreanic.json +++ b/data/Koreanic.json @@ -2,54 +2,50 @@ "name": "Koreanic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Hang": { - "full_object": "KiwiTokenizer()", - "original_lang_name": "korean", - "original_lang_code": "kor", - "scripts": [ - "Hang" - ], - "class_name": "KiwiTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Jejueo", "iso_1_code": null, "iso_3_code": "jje", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4662", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Korean", "iso_1_code": "ko", "iso_3_code": "kor", + "children": [], "tokenizers": { "Hang": { "full_object": "KiwiTokenizer()", "original_lang_name": "korean", "original_lang_code": "kor", - "scripts": [ - "Hang" - ], - "class_name": "KiwiTokenizer", - "macrolanguage": false + "script": "Hang", + "class_name": "KiwiTokenizer" } }, - "children": [], "node_i": "4663", - "scripts": [ + "native_tokenizers": [ "Hang" ], - "own_tokenizer": true + "scripts": [ + "Hang" + ] } ], + "tokenizers": { + "Hang": { + "full_object": "KiwiTokenizer()", + "original_lang_name": "korean", + "original_lang_code": "kor", + "script": "Hang", + "class_name": "KiwiTokenizer" + } + }, "node_i": "4661", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Kra-Dai.json b/data/Kra-Dai.json index 7c37b66a8fc84e755e196b8730c4d22188fb2664..210d93a773f5ce3a3dfa3799ee99ca148d25bfeb 100644 --- a/data/Kra-Dai.json +++ b/data/Kra-Dai.json @@ -2,1130 +2,1114 @@ "name": "Kra-Dai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Thai": { - "full_object": "ThaiTokenizer()", - "original_lang_name": "thai", - "original_lang_code": "tha", - "scripts": [ - "Thai" - ], - "class_name": "ThaiTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Nora", "iso_1_code": null, "iso_3_code": "nrr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4665", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hlai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jiamao", "iso_1_code": null, "iso_3_code": "jio", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4667", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hlai", "iso_1_code": null, "iso_3_code": "lic", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4668", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4666", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kam-Tai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Thai": { - "full_object": "ThaiTokenizer()", - "original_lang_name": "thai", - "original_lang_code": "tha", - "scripts": [ - "Thai" - ], - "class_name": "ThaiTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kam-Sui", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ai-Cham", "iso_1_code": null, "iso_3_code": "aih", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4671", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Biao", "iso_1_code": null, "iso_3_code": "byk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4672", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chadong", "iso_1_code": null, "iso_3_code": "cdy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4673", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cao Miao", "iso_1_code": null, "iso_3_code": "cov", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4674", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dong, Northern", "iso_1_code": null, "iso_3_code": "doc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4675", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dong, Southern", "iso_1_code": null, "iso_3_code": "kmc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4676", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kang", "iso_1_code": null, "iso_3_code": "kyp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4677", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mak", "iso_1_code": null, "iso_3_code": "mkg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4678", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mulam", "iso_1_code": null, "iso_3_code": "mlm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4679", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maonan", "iso_1_code": null, "iso_3_code": "mmd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4680", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sui", "iso_1_code": null, "iso_3_code": "swi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4681", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "T\u2019en", "iso_1_code": null, "iso_3_code": "tct", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4682", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4670", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lakkja", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lakkia", "iso_1_code": null, "iso_3_code": "lbc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4684", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lingao", "iso_1_code": null, "iso_3_code": "onb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4685", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4683", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Thai": { - "full_object": "ThaiTokenizer()", - "original_lang_name": "thai", - "original_lang_code": "tha", - "scripts": [ - "Thai" - ], - "class_name": "ThaiTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Tai Khang", "iso_1_code": null, "iso_3_code": "tnu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4687", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tai Pao", "iso_1_code": null, "iso_3_code": "tpo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4688", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tai Yo", "iso_1_code": null, "iso_3_code": "tyj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4689", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuan", "iso_1_code": null, "iso_3_code": "uan", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4690", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cao Lan", "iso_1_code": null, "iso_3_code": "mlc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4692", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nung", "iso_1_code": null, "iso_3_code": "nut", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4693", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ts\u2019\u00fcn-Lao", "iso_1_code": null, "iso_3_code": "tsl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4694", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "T\u00e0y", "iso_1_code": null, "iso_3_code": "tyz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4695", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zhuang, Minz", "iso_1_code": "za", "iso_3_code": "zgm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4696", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zhuang, Dai", "iso_1_code": "za", "iso_3_code": "zhd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4697", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zhuang, Nong", "iso_1_code": "za", "iso_3_code": "zhn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4698", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zhuang, Yang", "iso_1_code": "za", "iso_3_code": "zyg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4699", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zhuang, Yongnan", "iso_1_code": "za", "iso_3_code": "zyn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4700", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zhuang, Zuojiang", "iso_1_code": "za", "iso_3_code": "zzj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4701", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4691", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bouyei", "iso_1_code": null, "iso_3_code": "pcc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4703", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saek", "iso_1_code": null, "iso_3_code": "skb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4704", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yoy", "iso_1_code": null, "iso_3_code": "yoy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4705", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zhuang, Central Hongshuihe", "iso_1_code": "za", "iso_3_code": "zch", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4706", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zhuang, Eastern Hongshuihe", "iso_1_code": "za", "iso_3_code": "zeh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4707", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zhuang, Guibei", "iso_1_code": "za", "iso_3_code": "zgb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4708", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zhuang, Guibian", "iso_1_code": "za", "iso_3_code": "zgn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4709", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zhuang, Liujiang", "iso_1_code": "za", "iso_3_code": "zlj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4710", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zhuang, Lianshan", "iso_1_code": "za", "iso_3_code": "zln", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4711", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zhuang, Liuqian", "iso_1_code": "za", "iso_3_code": "zlq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4712", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zhuang, Qiubei", "iso_1_code": "za", "iso_3_code": "zqe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4713", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zhuang, Yongbei", "iso_1_code": "za", "iso_3_code": "zyb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4714", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zhuang, Youjiang", "iso_1_code": "za", "iso_3_code": "zyj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4715", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4702", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southwestern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Thai": { - "full_object": "ThaiTokenizer()", - "original_lang_name": "thai", - "original_lang_code": "tha", - "scripts": [ - "Thai" - ], - "class_name": "ThaiTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ahom", "iso_1_code": null, "iso_3_code": "aho", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4717", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aiton", "iso_1_code": null, "iso_3_code": "aio", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4718", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tai Dam", "iso_1_code": null, "iso_3_code": "blt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4719", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tai Ya", "iso_1_code": null, "iso_3_code": "cuu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4720", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "L\u00fc", "iso_1_code": null, "iso_3_code": "khb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4721", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khamti", "iso_1_code": null, "iso_3_code": "kht", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4722", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kh\u00fcn", "iso_1_code": null, "iso_3_code": "kkh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4723", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khamyang", "iso_1_code": null, "iso_3_code": "ksu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4724", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lao", "iso_1_code": "lo", "iso_3_code": "lao", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4725", + "native_tokenizers": [], "scripts": [ "Laoo" - ], - "own_tokenizer": false + ] }, { "name": "Thai, Northern", "iso_1_code": null, "iso_3_code": "nod", + "children": [], "tokenizers": { "Thai": { "full_object": "ThaiTokenizer()", "original_lang_name": "thai", "original_lang_code": "tha", - "scripts": [ - "Thai" - ], - "class_name": "ThaiTokenizer", - "macrolanguage": false + "script": "Thai", + "class_name": "ThaiTokenizer" } }, - "children": [], "node_i": "4726", + "native_tokenizers": [], "scripts": [ "Thai" - ], - "own_tokenizer": false + ] }, { "name": "Nyaw", "iso_1_code": null, "iso_3_code": "nyw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4727", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pa Di", "iso_1_code": null, "iso_3_code": "pdi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4728", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Phake", "iso_1_code": null, "iso_3_code": "phk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4729", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Phu Thai", "iso_1_code": null, "iso_3_code": "pht", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4730", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Phuan", "iso_1_code": null, "iso_3_code": "phu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4731", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shan", "iso_1_code": null, "iso_3_code": "shn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4732", + "native_tokenizers": [], "scripts": [ "Mymr" - ], - "own_tokenizer": false + ] }, { "name": "Thai Song", "iso_1_code": null, "iso_3_code": "soa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4733", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Thai, Southern", "iso_1_code": null, "iso_3_code": "sou", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4734", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tai N\u00fca", "iso_1_code": null, "iso_3_code": "tdd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4735", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Thai", "iso_1_code": "th", "iso_3_code": "tha", + "children": [], "tokenizers": { "Thai": { "full_object": "ThaiTokenizer()", "original_lang_name": "thai", "original_lang_code": "tha", - "scripts": [ - "Thai" - ], - "class_name": "ThaiTokenizer", - "macrolanguage": false + "script": "Thai", + "class_name": "ThaiTokenizer" } }, - "children": [], "node_i": "4736", - "scripts": [ + "native_tokenizers": [ "Thai" ], - "own_tokenizer": true + "scripts": [ + "Thai" + ] }, { "name": "Tai Long", "iso_1_code": null, "iso_3_code": "thi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4737", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tai Hongjin", "iso_1_code": null, "iso_3_code": "tiz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4738", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tai Laing", "iso_1_code": null, "iso_3_code": "tjl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4739", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tai Thanh", "iso_1_code": null, "iso_3_code": "tmm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4740", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Thai, Northeastern", "iso_1_code": null, "iso_3_code": "tts", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4741", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tai D\u00f3n", "iso_1_code": null, "iso_3_code": "twh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4742", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Thu Lao", "iso_1_code": null, "iso_3_code": "tyl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4743", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tai Daeng", "iso_1_code": null, "iso_3_code": "tyr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4744", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "T\u00e0y Sa Pa", "iso_1_code": null, "iso_3_code": "tys", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4745", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "T\u00e0y Tac", "iso_1_code": null, "iso_3_code": "tyt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4746", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yong", "iso_1_code": null, "iso_3_code": "yno", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4747", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Thai": { + "full_object": "ThaiTokenizer()", + "original_lang_name": "thai", + "original_lang_code": "tha", + "script": "Thai", + "class_name": "ThaiTokenizer" + } + }, "node_i": "4716", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Thai": { + "full_object": "ThaiTokenizer()", + "original_lang_name": "thai", + "original_lang_code": "tha", + "script": "Thai", + "class_name": "ThaiTokenizer" + } + }, "node_i": "4686", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Thai": { + "full_object": "ThaiTokenizer()", + "original_lang_name": "thai", + "original_lang_code": "tha", + "script": "Thai", + "class_name": "ThaiTokenizer" + } + }, "node_i": "4669", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kra", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Central Kra", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Buyang, Baha", "iso_1_code": null, "iso_3_code": "yha", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4750", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4749", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern Kra", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cun", "iso_1_code": null, "iso_3_code": "cuq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4752", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "En", "iso_1_code": null, "iso_3_code": "enc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4753", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Qabiao", "iso_1_code": null, "iso_3_code": "laq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4754", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Laha", "iso_1_code": null, "iso_3_code": "lha", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4755", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Buyang, Langnian", "iso_1_code": null, "iso_3_code": "yln", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4756", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yerong", "iso_1_code": null, "iso_3_code": "yrn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4757", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Buyang, E\u2019ma", "iso_1_code": null, "iso_3_code": "yzg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4758", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4751", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western Kra", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "A\u2019ou", "iso_1_code": null, "iso_3_code": "aou", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4760", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gelao, Green", "iso_1_code": null, "iso_3_code": "giq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4761", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gelao, Red", "iso_1_code": null, "iso_3_code": "gir", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4762", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mulao", "iso_1_code": null, "iso_3_code": "giu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4763", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Duoluo", "iso_1_code": null, "iso_3_code": "giw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4764", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Qau", "iso_1_code": null, "iso_3_code": "gqu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4765", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lachi", "iso_1_code": null, "iso_3_code": "lbt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4766", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lachi, White", "iso_1_code": null, "iso_3_code": "lwh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4767", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4759", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4748", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Thai": { + "full_object": "ThaiTokenizer()", + "original_lang_name": "thai", + "original_lang_code": "tha", + "script": "Thai", + "class_name": "ThaiTokenizer" + } + }, "node_i": "4664", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Kuki-Chin-Naga.json b/data/Kuki-Chin-Naga.json index ac73b4a49e125058501c527d216b3ff96e33466e..8fe2aeb299b7d4f6c923de566a9dee680c290e36 100644 --- a/data/Kuki-Chin-Naga.json +++ b/data/Kuki-Chin-Naga.json @@ -2,9 +2,9 @@ "name": "Kuki-Chin-Naga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4768", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Kwomtari.json b/data/Kwomtari.json index 39c376abb2ac9b210c69a1067f3aaf046cf90e60..af21ec03ccd0e8268e949498fe2d6b4e4239b279 100644 --- a/data/Kwomtari.json +++ b/data/Kwomtari.json @@ -2,62 +2,62 @@ "name": "Kwomtari", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Guriaso", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Muno", "iso_1_code": null, "iso_3_code": "grx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4771", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4770", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nuclear Kwomtari", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nai", "iso_1_code": null, "iso_3_code": "bio", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4773", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kwomtari", "iso_1_code": null, "iso_3_code": "kwo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4774", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4772", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4769", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git "a/data/Kx\342\200\231a.json" "b/data/Kx\342\200\231a.json" index fb4f3535b0505a73b8df5d0939148926c6af0feb..006e0191b992fffdf092b92997faf6cb60dec49c 100644 --- "a/data/Kx\342\200\231a.json" +++ "b/data/Kx\342\200\231a.json" @@ -2,63 +2,63 @@ "name": "Kx\u2019a", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "\u01c2\u2019Amkhoe", "iso_1_code": null, "iso_3_code": "huc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4776", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "!Kung", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kung-Ekoka", "iso_1_code": null, "iso_3_code": "knw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4778", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ju\u01c0\u2019hoansi", "iso_1_code": null, "iso_3_code": "ktz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4779", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Northwestern !Kung", "iso_1_code": null, "iso_3_code": "vaj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4780", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4777", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4775", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Lakes Plain.json b/data/Lakes Plain.json index 9d784e801578d7db814977d733ced82eb5934b1b..1329a0d8c7e97296351ea7cb2f5114beb1226e02 100644 --- a/data/Lakes Plain.json +++ b/data/Lakes Plain.json @@ -2,298 +2,298 @@ "name": "Lakes Plain", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Awera", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Awera", "iso_1_code": null, "iso_3_code": "awr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4783", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4782", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Foau", "iso_1_code": null, "iso_3_code": "flh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4785", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Taburta", "iso_1_code": null, "iso_3_code": "tbp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4786", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4784", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rasawa-Saponi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Rasawa", "iso_1_code": null, "iso_3_code": "rac", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4788", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saponi", "iso_1_code": null, "iso_3_code": "spi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4789", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4787", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tariku", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Edopi", "iso_1_code": null, "iso_3_code": "dbf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4792", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Iau", "iso_1_code": null, "iso_3_code": "tmu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4793", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4791", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Duvle", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Duvle", "iso_1_code": null, "iso_3_code": "duv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4795", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4794", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Obokuitai", "iso_1_code": null, "iso_3_code": "afz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4797", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Biritai", "iso_1_code": null, "iso_3_code": "bqq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4798", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eritai", "iso_1_code": null, "iso_3_code": "ert", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4799", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kwerisa", "iso_1_code": null, "iso_3_code": "kkb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4800", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Papasena", "iso_1_code": null, "iso_3_code": "pas", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4801", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaiy", "iso_1_code": null, "iso_3_code": "tcq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4802", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Doutai", "iso_1_code": null, "iso_3_code": "tds", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4803", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sikaritai", "iso_1_code": null, "iso_3_code": "tty", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4804", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waritai", "iso_1_code": null, "iso_3_code": "wbe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4805", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4796", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Fayu", "iso_1_code": null, "iso_3_code": "fau", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4807", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kirikiri", "iso_1_code": null, "iso_3_code": "kiy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4808", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tause", "iso_1_code": null, "iso_3_code": "tad", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4809", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4806", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4790", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4781", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Language isolate.json b/data/Language isolate.json index 2f34a8545c4589160bfabca4cc6e84edc4dd12d6..d670e2aebd3d1644fba8d140ebdc39efda9fceff 100644 --- a/data/Language isolate.json +++ b/data/Language isolate.json @@ -2,1256 +2,1210 @@ "name": "Language isolate", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"eu\")", - "original_lang_name": "basque", - "original_lang_code": "eus", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ainu", "iso_1_code": null, "iso_3_code": "ain", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"eu\")", "original_lang_name": "basque", "original_lang_code": "eus", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4811", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mpur", "iso_1_code": null, "iso_3_code": "akc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4812", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Andoque", "iso_1_code": null, "iso_3_code": "ano", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4813", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Atakapa", "iso_1_code": null, "iso_3_code": "aqp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4814", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Arutani", "iso_1_code": null, "iso_3_code": "atx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4815", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waorani", "iso_1_code": null, "iso_3_code": "auc", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"eu\")", "original_lang_name": "basque", "original_lang_code": "eus", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4816", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Aushiri", "iso_1_code": null, "iso_3_code": "avs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4817", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Odiai", "iso_1_code": null, "iso_3_code": "bhf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4818", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Abinomn", "iso_1_code": null, "iso_3_code": "bsa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4819", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Burushaski", "iso_1_code": null, "iso_3_code": "bsk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4820", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Callawalla", "iso_1_code": null, "iso_3_code": "caw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4821", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chiquitano", "iso_1_code": null, "iso_3_code": "cax", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"eu\")", "original_lang_name": "basque", "original_lang_code": "eus", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4822", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Canichana", "iso_1_code": null, "iso_3_code": "caz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4823", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kandozi-Chapra", "iso_1_code": null, "iso_3_code": "cbu", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"eu\")", "original_lang_name": "basque", "original_lang_code": "eus", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4824", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Cent\u00fa\u00fam", "iso_1_code": null, "iso_3_code": "cet", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4825", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chimariko", "iso_1_code": null, "iso_3_code": "cid", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4826", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cof\u00e1n", "iso_1_code": null, "iso_3_code": "con", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"eu\")", "original_lang_name": "basque", "original_lang_code": "eus", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4827", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chitimacha", "iso_1_code": null, "iso_3_code": "ctm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4828", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cuitlatec", "iso_1_code": null, "iso_3_code": "cuy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4829", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cayubaba", "iso_1_code": null, "iso_3_code": "cyb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4830", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bangime", "iso_1_code": null, "iso_3_code": "dba", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4831", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Esselen", "iso_1_code": null, "iso_3_code": "esq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4832", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Basque", "iso_1_code": "eu", "iso_3_code": "eus", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"eu\")", "original_lang_name": "basque", "original_lang_code": "eus", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4833", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] }, { "name": "Iat\u00ea", "iso_1_code": null, "iso_3_code": "fun", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4834", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Laal", "iso_1_code": null, "iso_3_code": "gdm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4835", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tayap", "iso_1_code": null, "iso_3_code": "gpn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4836", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Guat\u00f3", "iso_1_code": null, "iso_3_code": "gta", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4837", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hatam", "iso_1_code": null, "iso_3_code": "had", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4838", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hadza", "iso_1_code": null, "iso_3_code": "hts", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4839", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ir\u00e1ntxe", "iso_1_code": null, "iso_3_code": "irn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4840", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Itonama", "iso_1_code": null, "iso_3_code": "ito", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4841", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cams\u00e1", "iso_1_code": null, "iso_3_code": "kbh", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"eu\")", "original_lang_name": "basque", "original_lang_code": "eus", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4842", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kusunda", "iso_1_code": null, "iso_3_code": "kgg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4843", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Abun", "iso_1_code": null, "iso_3_code": "kgr", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"eu\")", "original_lang_name": "basque", "original_lang_code": "eus", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4844", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Klamath-Modoc", "iso_1_code": null, "iso_3_code": "kla", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4845", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kol", "iso_1_code": null, "iso_3_code": "kol", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4846", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuot", "iso_1_code": null, "iso_3_code": "kto", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"eu\")", "original_lang_name": "basque", "original_lang_code": "eus", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4847", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kutenai", "iso_1_code": null, "iso_3_code": "kut", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4848", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kunza", "iso_1_code": null, "iso_3_code": "kuz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4849", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kano\u00e9", "iso_1_code": null, "iso_3_code": "kxo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4850", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karok", "iso_1_code": null, "iso_3_code": "kyh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4851", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karir\u00ed-Xoc\u00f3", "iso_1_code": null, "iso_3_code": "kzw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4852", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Leco", "iso_1_code": null, "iso_3_code": "lec", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4853", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Molale", "iso_1_code": null, "iso_3_code": "mbe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4854", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mawes", "iso_1_code": null, "iso_3_code": "mgk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4855", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Elseng", "iso_1_code": null, "iso_3_code": "mrf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4856", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Massep", "iso_1_code": null, "iso_3_code": "mvs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4857", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Muniche", "iso_1_code": null, "iso_3_code": "myr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4858", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Movima", "iso_1_code": null, "iso_3_code": "mzp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4859", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yale", "iso_1_code": null, "iso_3_code": "nce", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4860", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Natchez", "iso_1_code": null, "iso_3_code": "ncz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4861", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gilyak", "iso_1_code": null, "iso_3_code": "niv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4862", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nihali", "iso_1_code": null, "iso_3_code": "nll", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4863", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mochica", "iso_1_code": null, "iso_3_code": "omc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4864", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Omurano", "iso_1_code": null, "iso_3_code": "omu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4865", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ofay\u00e9", "iso_1_code": null, "iso_3_code": "opy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4866", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oti", "iso_1_code": null, "iso_3_code": "oti", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4867", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pankarar\u00fa", "iso_1_code": null, "iso_3_code": "paz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4868", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pyu", "iso_1_code": null, "iso_3_code": "pby", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4869", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Puelche", "iso_1_code": null, "iso_3_code": "pue", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4870", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Puquina", "iso_1_code": null, "iso_3_code": "puq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4871", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rikbaktsa", "iso_1_code": null, "iso_3_code": "rkb", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"eu\")", "original_lang_name": "basque", "original_lang_code": "eus", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4872", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sandawe", "iso_1_code": null, "iso_3_code": "sad", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4873", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Seri", "iso_1_code": null, "iso_3_code": "sei", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4874", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shasta", "iso_1_code": null, "iso_3_code": "sht", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4875", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Siuslaw", "iso_1_code": null, "iso_3_code": "sis", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4876", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Salinan", "iso_1_code": null, "iso_3_code": "sln", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4877", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sap\u00e9", "iso_1_code": null, "iso_3_code": "spc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4878", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sulka", "iso_1_code": null, "iso_3_code": "sua", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"eu\")", "original_lang_name": "basque", "original_lang_code": "eus", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4879", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Aikan\u00e3", "iso_1_code": null, "iso_3_code": "tba", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4880", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ticuna", "iso_1_code": null, "iso_3_code": "tca", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"eu\")", "original_lang_name": "basque", "original_lang_code": "eus", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4881", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Taruma", "iso_1_code": null, "iso_3_code": "tdm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4882", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Timucua", "iso_1_code": null, "iso_3_code": "tjm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4883", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Trumai", "iso_1_code": null, "iso_3_code": "tpy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4884", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tonkawa", "iso_1_code": null, "iso_3_code": "tqw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4885", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Taushiro", "iso_1_code": null, "iso_3_code": "trr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4886", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tux\u00e1", "iso_1_code": null, "iso_3_code": "tud", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4887", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tunica", "iso_1_code": null, "iso_3_code": "tun", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4888", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uamu\u00e9", "iso_1_code": null, "iso_3_code": "uam", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4889", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Urarina", "iso_1_code": null, "iso_3_code": "ura", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"eu\")", "original_lang_name": "basque", "original_lang_code": "eus", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4890", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Vilela", "iso_1_code": null, "iso_3_code": "vil", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4891", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Washo", "iso_1_code": null, "iso_3_code": "was", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4892", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Warao", "iso_1_code": null, "iso_3_code": "wba", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"eu\")", "original_lang_name": "basque", "original_lang_code": "eus", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4893", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "M\u00e1ku", "iso_1_code": null, "iso_3_code": "xak", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4894", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cayuse", "iso_1_code": null, "iso_3_code": "xcy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4895", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Xinca", "iso_1_code": null, "iso_3_code": "xin", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4896", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Xukur\u00fa", "iso_1_code": null, "iso_3_code": "xoo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4897", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tasmanian, Northeastern", "iso_1_code": null, "iso_3_code": "xpb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4898", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tasmanian, Oyster Bay", "iso_1_code": null, "iso_3_code": "xpd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4899", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tasmanian, Southeast", "iso_1_code": null, "iso_3_code": "xpf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4900", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tasmanian, North Midlands", "iso_1_code": null, "iso_3_code": "xph", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4901", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tasmanian, Port Sorell", "iso_1_code": null, "iso_3_code": "xpl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4902", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tasmanian, Northern", "iso_1_code": null, "iso_3_code": "xpv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4903", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tasmanian, Northwestern", "iso_1_code": null, "iso_3_code": "xpw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4904", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tasmanian, Southwestern", "iso_1_code": null, "iso_3_code": "xpx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4905", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tasmanian, Bruny Island", "iso_1_code": null, "iso_3_code": "xpz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4906", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kwaza", "iso_1_code": null, "iso_3_code": "xwa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4907", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Y\u00e1mana", "iso_1_code": null, "iso_3_code": "yag", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4908", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hod\u00ef", "iso_1_code": null, "iso_3_code": "yau", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4909", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yana", "iso_1_code": null, "iso_3_code": "ynn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4910", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yuchi", "iso_1_code": null, "iso_3_code": "yuc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4911", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yuracare", "iso_1_code": null, "iso_3_code": "yuz", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"eu\")", "original_lang_name": "basque", "original_lang_code": "eus", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "4912", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Karankawa", "iso_1_code": null, "iso_3_code": "zkk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4913", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zuni", "iso_1_code": null, "iso_3_code": "zun", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4914", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"eu\")", + "original_lang_name": "basque", + "original_lang_code": "eus", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "4810", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Lencan.json b/data/Lencan.json index 775ab43a69f0fcf2935aed50dd36bd00f468bfc5..9f43dda358ff176ece7dc2148c41a0a3e85abbdb 100644 --- a/data/Lencan.json +++ b/data/Lencan.json @@ -2,20 +2,20 @@ "name": "Lencan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lenca", "iso_1_code": null, "iso_3_code": "len", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4916", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4915", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Lower Mamberamo.json b/data/Lower Mamberamo.json index daec1ff124f57da93c2d3930b8ed9c4980df2819..ef3488ad22f56ea41eb57c2e7e67890bda4e8144 100644 --- a/data/Lower Mamberamo.json +++ b/data/Lower Mamberamo.json @@ -2,30 +2,30 @@ "name": "Lower Mamberamo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Warembori", "iso_1_code": null, "iso_3_code": "wsa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4918", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yoke", "iso_1_code": null, "iso_3_code": "yki", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4919", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4917", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Maiduan.json b/data/Maiduan.json index 9b7732c55544daca151c8630ac55d1284faa4672..ef816215c01ece85cd6c03aef29fb9c10de2f736 100644 --- a/data/Maiduan.json +++ b/data/Maiduan.json @@ -2,61 +2,61 @@ "name": "Maiduan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Maidu, Northwest", "iso_1_code": null, "iso_3_code": "mjd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4921", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nisenan", "iso_1_code": null, "iso_3_code": "nsz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4922", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maidu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Maidu, Northeast", "iso_1_code": null, "iso_3_code": "nmu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4924", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maidu, Valley", "iso_1_code": null, "iso_3_code": "vmv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4925", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4923", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4920", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Maipurean.json b/data/Maipurean.json index 8d161b1d965323bb2161d176207817ed16c198f1..2003a8eb48ffa9e56f499fa163dfae31a1afc4cb 100644 --- a/data/Maipurean.json +++ b/data/Maipurean.json @@ -2,993 +2,993 @@ "name": "Maipurean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Palikur", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Palik\u00far", "iso_1_code": null, "iso_3_code": "plu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4930", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "4929", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4928", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maritime", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ta-Maipurean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Arawak", "iso_1_code": null, "iso_3_code": "arw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4933", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wayuu", "iso_1_code": null, "iso_3_code": "guc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4934", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Paraujano", "iso_1_code": null, "iso_3_code": "pbg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4935", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ta\u00edno", "iso_1_code": null, "iso_3_code": "tnq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4936", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "I\u00f1eri", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Garifuna", "iso_1_code": null, "iso_3_code": "cab", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4938", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Island Carib", "iso_1_code": null, "iso_3_code": "crb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4939", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4937", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4932", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wapixana", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Atorada", "iso_1_code": null, "iso_3_code": "aox", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4941", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mapidian", "iso_1_code": null, "iso_3_code": "mpw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4942", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wapishana", "iso_1_code": null, "iso_3_code": "wap", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4943", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "4940", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4931", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Upper Amazon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Res\u00edgaro", "iso_1_code": null, "iso_3_code": "rgr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4945", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central Upper Amazon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bar\u00e9", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bar\u00e9", "iso_1_code": null, "iso_3_code": "bae", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4948", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4947", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yavitero", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Baniva", "iso_1_code": null, "iso_3_code": "bvv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4950", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yavitero", "iso_1_code": null, "iso_3_code": "yvt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4951", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4949", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4946", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern Nawiki", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tariana", "iso_1_code": null, "iso_3_code": "tae", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4953", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Baniwa", "iso_1_code": null, "iso_3_code": "bwi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4955", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Curripaco", "iso_1_code": null, "iso_3_code": "kpc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4956", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4954", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4952", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Xiri\u00e2na", "iso_1_code": null, "iso_3_code": "xir", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4958", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yaba\u00e2na", "iso_1_code": null, "iso_3_code": "ybn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4959", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4957", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western Nawiki", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cabiyar\u00ed", "iso_1_code": null, "iso_3_code": "cbb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4961", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yucuna", "iso_1_code": null, "iso_3_code": "ycn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4962", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Piapoco", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Achagua", "iso_1_code": null, "iso_3_code": "aca", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4964", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Piapoco", "iso_1_code": null, "iso_3_code": "pio", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4965", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "4963", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Warekena", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Guarequena", "iso_1_code": null, "iso_3_code": "gae", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4967", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mandahuaca", "iso_1_code": null, "iso_3_code": "mht", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4968", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4966", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4960", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4944", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4927", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Campa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ash\u00e1ninka", "iso_1_code": null, "iso_3_code": "cni", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4971", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ash\u00e9ninga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ash\u00e9ninka, Pajonal", "iso_1_code": null, "iso_3_code": "cjo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4973", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nanti", "iso_1_code": null, "iso_3_code": "cox", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4974", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ash\u00e9ninka, Ucayali-Yur\u00faa", "iso_1_code": null, "iso_3_code": "cpb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4975", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ajy\u00edninka Apurucayali", "iso_1_code": null, "iso_3_code": "cpc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4976", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ash\u00e9ninka, Pichis", "iso_1_code": null, "iso_3_code": "cpu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4977", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ash\u00e9ninka, South Ucayali", "iso_1_code": null, "iso_3_code": "cpy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4978", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nomatsigenga", "iso_1_code": null, "iso_3_code": "not", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4979", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ash\u00e9ninka, Peren\u00e9", "iso_1_code": null, "iso_3_code": "prq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4980", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "4972", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Machiguenga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Caquinte", "iso_1_code": null, "iso_3_code": "cot", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4982", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Matsigenka", "iso_1_code": null, "iso_3_code": "mcb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4983", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "4981", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4970", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pares\u00ed", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Parec\u00eds", "iso_1_code": null, "iso_3_code": "pab", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4986", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Saraveca", "iso_1_code": null, "iso_3_code": "sar", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4987", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4985", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waur\u00e1", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mehin\u00e1ku", "iso_1_code": null, "iso_3_code": "mmh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4989", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waur\u00e1-Meinaku", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Waur\u00e1", "iso_1_code": null, "iso_3_code": "wau", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4991", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yawalapit\u00ed", "iso_1_code": null, "iso_3_code": "yaw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4992", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4990", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4988", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4984", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern Outlier", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mojo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Baure", "iso_1_code": null, "iso_3_code": "brg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4995", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Paunaka", "iso_1_code": null, "iso_3_code": "pnk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4996", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mojo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ignaciano", "iso_1_code": null, "iso_3_code": "ign", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4998", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Trinitario", "iso_1_code": null, "iso_3_code": "trn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "4999", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "4997", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4994", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Piro", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Apurin\u00e3", "iso_1_code": null, "iso_3_code": "apu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5001", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "I\u00f1apari", "iso_1_code": null, "iso_3_code": "inp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5002", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Piro", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mashco Piro", "iso_1_code": null, "iso_3_code": "cuj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5004", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Machinere", "iso_1_code": null, "iso_3_code": "mpd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5005", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yine", "iso_1_code": null, "iso_3_code": "pib", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5006", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5003", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5000", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Terena", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chan\u00e9", "iso_1_code": null, "iso_3_code": "caj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5008", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Guana", "iso_1_code": null, "iso_3_code": "gqn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5009", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ter\u00eana", "iso_1_code": null, "iso_3_code": "ter", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5010", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5007", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4993", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yanesha\u2019", "iso_1_code": null, "iso_3_code": "ame", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5012", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chamicuro", "iso_1_code": null, "iso_3_code": "ccc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5013", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5011", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4969", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mawayana", "iso_1_code": null, "iso_3_code": "mzx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5015", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Enawen\u00e9-Naw\u00e9", "iso_1_code": null, "iso_3_code": "unk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5016", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5014", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "4926", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Mairasi.json b/data/Mairasi.json index 441a8d19bc3f84a196cca6e8a5cf97e6ef06e3ae..9e35457c6e0c328c533df475a5c44b60104dab62 100644 --- a/data/Mairasi.json +++ b/data/Mairasi.json @@ -2,40 +2,40 @@ "name": "Mairasi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Semimi", "iso_1_code": null, "iso_3_code": "etz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5018", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mer", "iso_1_code": null, "iso_3_code": "mnu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5019", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mairasi", "iso_1_code": null, "iso_3_code": "zrs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5020", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5017", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Mapudungu.json b/data/Mapudungu.json index 6688ab007793c0cb9b453ca9da047aefbece3d4e..5458daf5abe81c369d794bfb6b36c587b5c92f17 100644 --- a/data/Mapudungu.json +++ b/data/Mapudungu.json @@ -2,32 +2,32 @@ "name": "Mapudungu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mapudungun", "iso_1_code": null, "iso_3_code": "arn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5022", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Huilliche", "iso_1_code": null, "iso_3_code": "huh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5023", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5021", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Mascoyan.json b/data/Mascoyan.json index 50b5b1b81b532a2dce7e80adb3a1df535d20ff05..617813cbc2a7964524b1694e4a84bb0fa000f501 100644 --- a/data/Mascoyan.json +++ b/data/Mascoyan.json @@ -2,85 +2,85 @@ "name": "Mascoyan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Angait\u00e9", "iso_1_code": null, "iso_3_code": "aqt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5025", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Enlhet", "iso_1_code": null, "iso_3_code": "enl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5026", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Enxet", "iso_1_code": null, "iso_3_code": "enx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5027", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Guana", "iso_1_code": null, "iso_3_code": "gva", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5028", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sanapan\u00e1", "iso_1_code": null, "iso_3_code": "spn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5029", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mascoy", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Toba-Maskoy", "iso_1_code": null, "iso_3_code": "tmf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5031", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5030", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5024", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Matacoan.json b/data/Matacoan.json index d40054d9a0d186621c009507cdb35a1ccf0447aa..b234cb51c14d625c646a75fb75c7e192e73c8722 100644 --- a/data/Matacoan.json +++ b/data/Matacoan.json @@ -2,114 +2,114 @@ "name": "Matacoan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nivacl\u00e9", "iso_1_code": null, "iso_3_code": "cag", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5033", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Maka", "iso_1_code": null, "iso_3_code": "mca", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5034", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chorote", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chorote, Iyo\u2019wujwa", "iso_1_code": null, "iso_3_code": "crq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5036", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chorote, Iyojwa\u2019ja", "iso_1_code": null, "iso_3_code": "crt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5037", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5035", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mataco", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Weenhayek", "iso_1_code": null, "iso_3_code": "mtp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5039", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Pilcomayo Wich\u00ed", "iso_1_code": null, "iso_3_code": "mzh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5040", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bermejo Wich\u00ed", "iso_1_code": null, "iso_3_code": "wlv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5041", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5038", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5032", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Maxakalian.json b/data/Maxakalian.json index 345b7743b6c9bcbbb29d927600ca4ef29b4c79c9..d2959a6097172802a6348c651c9f709d2f9ba6d0 100644 --- a/data/Maxakalian.json +++ b/data/Maxakalian.json @@ -2,32 +2,32 @@ "name": "Maxakalian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Maxakal\u00ed", "iso_1_code": null, "iso_3_code": "mbl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5043", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Patax\u00f3 H\u00e3-Ha-H\u00e3e", "iso_1_code": null, "iso_3_code": "pth", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5044", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5042", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Mayan.json b/data/Mayan.json index 630427a30962a8274daec5b946fc39d38db14ca4..78120e63120b7bea6aa919501d8f8b5b86c43050 100644 --- a/data/Mayan.json +++ b/data/Mayan.json @@ -2,700 +2,700 @@ "name": "Mayan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Huastecan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chicomuceltec", "iso_1_code": null, "iso_3_code": "cob", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5047", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Huastec", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Huastec", "iso_1_code": null, "iso_3_code": "hus", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5049", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5048", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5046", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yucatecan-Core Mayan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Core Mayan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cholan-Tzeltalan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cholan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chol-Chontal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chontal, Tabasco", "iso_1_code": null, "iso_3_code": "chf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5055", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chol", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chol", "iso_1_code": null, "iso_3_code": "ctu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5057", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5056", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5054", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chorti-Cholti", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ch\u2019orti\u2019", "iso_1_code": null, "iso_3_code": "caa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5059", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5058", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5053", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tzeltalan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tzeltal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tzeltal", "iso_1_code": null, "iso_3_code": "tzh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5062", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5061", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tzotzil", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tzotzil", "iso_1_code": null, "iso_3_code": "tzo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5064", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5063", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5060", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5052", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5051", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "K\u2019ichean-Mamean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "K\u2019ichean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Q\ua78ceqchi\ua78c", "iso_1_code": null, "iso_3_code": "kek", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5067", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Uspanteko", "iso_1_code": null, "iso_3_code": "usp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5068", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Poqom-K\u2019ichean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Core K\u2019ichean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Achi", "iso_1_code": null, "iso_3_code": "acr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5071", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "K\u2019iche\u2019", "iso_1_code": null, "iso_3_code": "quc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5072", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sipakapense", "iso_1_code": null, "iso_3_code": "qum", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5073", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sakapulteko", "iso_1_code": null, "iso_3_code": "quv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5074", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaqchikel-Tz\u2019utujil", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kaqchikel", "iso_1_code": null, "iso_3_code": "cak", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5076", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tz\u2019utujil", "iso_1_code": null, "iso_3_code": "tzj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5077", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5075", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5070", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Poqom", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Poqomam", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Poqomam", "iso_1_code": null, "iso_3_code": "poc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5080", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5079", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Poqomchi\u2019", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Poqomchi\u2019", "iso_1_code": null, "iso_3_code": "poh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5082", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5081", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5078", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5069", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5066", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mamean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Awakateko-Ixil", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Awakateko", "iso_1_code": null, "iso_3_code": "agu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5085", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ixil", "iso_1_code": null, "iso_3_code": "ixl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5086", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5084", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teco-Mam", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mam", "iso_1_code": null, "iso_3_code": "mam", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5088", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tektiteko", "iso_1_code": null, "iso_3_code": "ttc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5089", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5087", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5083", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5065", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Q\u2019anjob\u2019alan-Chujean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chujean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chuj", "iso_1_code": null, "iso_3_code": "cac", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5092", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tojolabal", "iso_1_code": null, "iso_3_code": "toj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5093", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5091", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Q\u2019anjob\u2019alan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mocho", "iso_1_code": null, "iso_3_code": "mhc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5095", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Q\u2019anjob\u2019al-Akateko-Jakalteko", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jakalteko", "iso_1_code": null, "iso_3_code": "jac", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5097", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Q\u2019anjob\u2019al", "iso_1_code": null, "iso_3_code": "kjb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5098", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Akateko", "iso_1_code": null, "iso_3_code": "knj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5099", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5096", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5094", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5090", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yucatecan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mopan-Itz\u00e1", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Itza\u2019", "iso_1_code": null, "iso_3_code": "itz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5102", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maya, Mop\u00e1n", "iso_1_code": null, "iso_3_code": "mop", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5103", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5101", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yucatec-Lacandon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lacandon", "iso_1_code": null, "iso_3_code": "lac", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5105", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Maya, Yucatec", "iso_1_code": null, "iso_3_code": "yua", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5106", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5104", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5100", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5050", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5045", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Maybrat.json b/data/Maybrat.json index 6a6957c52371729622c6b5416f897c8362d25356..3fe322a32addb88b81df89ce341d94a015e91d71 100644 --- a/data/Maybrat.json +++ b/data/Maybrat.json @@ -2,30 +2,30 @@ "name": "Maybrat", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mai Brat", "iso_1_code": null, "iso_3_code": "ayz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5108", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karon Dori", "iso_1_code": null, "iso_3_code": "kgw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5109", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5107", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Misumalpan.json b/data/Misumalpan.json index 38790e7173c5885c074586f67ead19274c7d976a..43b025c2d2b9c35a7bd74468d4ec5c226e31c4ef 100644 --- a/data/Misumalpan.json +++ b/data/Misumalpan.json @@ -2,86 +2,86 @@ "name": "Misumalpan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "M\u00edskito", "iso_1_code": null, "iso_3_code": "miq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5111", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ulwa", "iso_1_code": null, "iso_3_code": "ulw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5112", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mayangna", "iso_1_code": null, "iso_3_code": "yan", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5113", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sumu-Cacaopera-Matagalpa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cacaopera-Matagalpa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cacaopera", "iso_1_code": null, "iso_3_code": "ccr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5116", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Matagalpa", "iso_1_code": null, "iso_3_code": "mtn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5117", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5115", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5114", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5110", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Miwok-Costanoan.json b/data/Miwok-Costanoan.json index 2725f5e5f0706b7eed276957a51fa83833c27708..11fbc2601c15a2ba6a5c3c4126403e5d70f9b78c 100644 --- a/data/Miwok-Costanoan.json +++ b/data/Miwok-Costanoan.json @@ -2,165 +2,165 @@ "name": "Miwok-Costanoan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Costanoan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ohlone, Southern", "iso_1_code": null, "iso_3_code": "css", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5120", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ohlone, Northern", "iso_1_code": null, "iso_3_code": "cst", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5121", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karkin", "iso_1_code": null, "iso_3_code": "krb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5122", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5119", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miwokan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Eastern Miwokan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bay Miwok", "iso_1_code": null, "iso_3_code": "mkq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5125", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miwok, Plains", "iso_1_code": null, "iso_3_code": "pmw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5126", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sierra Miwok", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Miwok, Central Sierra", "iso_1_code": null, "iso_3_code": "csm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5128", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miwok, Northern Sierra", "iso_1_code": null, "iso_3_code": "nsq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5129", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miwok, Southern Sierra", "iso_1_code": null, "iso_3_code": "skd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5130", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5127", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5124", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western Miwokan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Miwok, Coast", "iso_1_code": null, "iso_3_code": "csi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5132", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miwok, Lake", "iso_1_code": null, "iso_3_code": "lmw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5133", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5131", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5123", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5118", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Mixe-Zoquean.json b/data/Mixe-Zoquean.json index 7a531769ec3b3e4c5a929b8fb039b9e1af1d6459..7c10f957c717de13b124b34fa5649c28673e88c1 100644 --- a/data/Mixe-Zoquean.json +++ b/data/Mixe-Zoquean.json @@ -2,301 +2,301 @@ "name": "Mixe-Zoquean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mixean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Popoluca, Oluta", "iso_1_code": null, "iso_3_code": "plo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5136", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Popoluca, Sayula", "iso_1_code": null, "iso_3_code": "pos", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5137", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Oaxaca Mixean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mixe, Totontepec", "iso_1_code": null, "iso_3_code": "mto", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5139", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mixe, North Central", "iso_1_code": null, "iso_3_code": "neq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5140", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixe, Quetzaltepec", "iso_1_code": null, "iso_3_code": "pxm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5141", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Lowland Mixe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mixe, Coatl\u00e1n", "iso_1_code": null, "iso_3_code": "mco", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5143", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mixe, Isthmus", "iso_1_code": null, "iso_3_code": "mir", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5144", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mixe, Mazatl\u00e1n", "iso_1_code": null, "iso_3_code": "mzl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5145", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5142", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Midland Mixe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mixe, Juquila", "iso_1_code": null, "iso_3_code": "mxq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5147", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5146", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South Highland Mixe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mixe, Tlahuitoltepec", "iso_1_code": null, "iso_3_code": "mxp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5149", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5148", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5138", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5135", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zoquean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Zoque, Chimalapa", "iso_1_code": null, "iso_3_code": "zoh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5151", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chiapas Zoquean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Zoque, Copainal\u00e1", "iso_1_code": null, "iso_3_code": "zoc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5153", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zoque, Francisco Le\u00f3n", "iso_1_code": null, "iso_3_code": "zos", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5154", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Northeast Zoque", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Zoque, Ray\u00f3n", "iso_1_code": null, "iso_3_code": "zor", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5156", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5155", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5152", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gulf Zoquean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Popoluca, Highland", "iso_1_code": null, "iso_3_code": "poi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5158", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Popoluca, Texistepec", "iso_1_code": null, "iso_3_code": "poq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5159", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zoque, Tabasco", "iso_1_code": null, "iso_3_code": "zoq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5160", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5157", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5150", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5134", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Mixed language.json b/data/Mixed language.json index 050cccc558d3ef09bd25e62affc2db97773a3174..5a8b542084d432ef8f721294d0cfdeba298d5d27 100644 --- a/data/Mixed language.json +++ b/data/Mixed language.json @@ -2,519 +2,519 @@ "name": "Mixed language", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "E", "iso_1_code": null, "iso_3_code": "eee", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5162", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "N\u2019Ko", "iso_1_code": null, "iso_3_code": "nqo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5163", + "native_tokenizers": [], "scripts": [ "Nkoo" - ], - "own_tokenizer": false + ] }, { "name": "Armenian-Romani", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lomavren", "iso_1_code": null, "iso_3_code": "rmi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5165", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5164", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bantu-Cushitic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mbugu", "iso_1_code": null, "iso_3_code": "mhd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5167", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5166", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Basque-Romani", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Erromintxela", "iso_1_code": null, "iso_3_code": "emx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5169", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5168", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cakchiquel-Quich\u00e9", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kaqchikel-K\u2019iche\u2019 Mixed Language", "iso_1_code": null, "iso_3_code": "ckz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5171", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5170", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cebuano-Spanish-English", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Eskayan", "iso_1_code": null, "iso_3_code": "esy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5173", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5172", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chinese-Tibetan-Bonan Mongour", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Wutunhua", "iso_1_code": null, "iso_3_code": "wuh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5175", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5174", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Danish-Romani", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Traveller Danish", "iso_1_code": null, "iso_3_code": "rmd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5177", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5176", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "English-Romani", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Angloromani", "iso_1_code": null, "iso_3_code": "rme", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5179", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5178", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "French-Cree", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Michif", "iso_1_code": null, "iso_3_code": "crg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5181", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5180", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "German-Yiddish-Romani-Rotwelsch", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yeniche", "iso_1_code": null, "iso_3_code": "yec", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5183", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5182", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Greek-Romani", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Romano-Greek", "iso_1_code": null, "iso_3_code": "rge", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5185", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5184", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gurindji-Kriol", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gurindji Kriol", "iso_1_code": null, "iso_3_code": "gjr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5187", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5186", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Iberian-Romani", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cal\u00f3", "iso_1_code": null, "iso_3_code": "rmq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5189", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5188", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Irish-undocumented", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Shelta", "iso_1_code": null, "iso_3_code": "sth", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5191", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5190", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kannada-Malayalam-Tamil", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chetti, Moundadan", "iso_1_code": null, "iso_3_code": "cty", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5193", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5192", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Norwegian-Romani", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Norwegian, Traveller", "iso_1_code": null, "iso_3_code": "rmg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5195", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5194", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Russian-Aleut", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aleut, Mednyj", "iso_1_code": null, "iso_3_code": "mud", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5197", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5196", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Serbian-Romani", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Romano-Serbian", "iso_1_code": null, "iso_3_code": "rsb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5199", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5198", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Songhay-Berber", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tagdal", "iso_1_code": null, "iso_3_code": "tda", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5201", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5200", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Spanish-Quichua", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Media Lengua", "iso_1_code": null, "iso_3_code": "mue", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5203", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5202", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Swedish-Romani", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Romani, Tavringer", "iso_1_code": null, "iso_3_code": "rmu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5205", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5204", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yapese-Ulithi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nguluwan", "iso_1_code": null, "iso_3_code": "nuw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5207", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5206", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zulu-Bantu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Camtho", "iso_1_code": null, "iso_3_code": "cmt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5209", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5208", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5161", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Mongol-Langam.json b/data/Mongol-Langam.json index 12bcf2a7e9f069e5aadb8666d2fab78548748a60..133f30198cb7614653071d9e07afdfc2156fd4fa 100644 --- a/data/Mongol-Langam.json +++ b/data/Mongol-Langam.json @@ -2,40 +2,40 @@ "name": "Mongol-Langam", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pondi", "iso_1_code": null, "iso_3_code": "lnm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5211", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mwakai", "iso_1_code": null, "iso_3_code": "mgt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5212", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ulwa", "iso_1_code": null, "iso_3_code": "yla", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5213", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5210", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Mongolic.json b/data/Mongolic.json index 761f8aa4a69db300fe14e80a0e62f86e4ddaaeb5..8bc79158dac17c4cf2650f2842f449723207ebe6 100644 --- a/data/Mongolic.json +++ b/data/Mongolic.json @@ -2,365 +2,337 @@ "name": "Mongolic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Cyrl": { - "full_object": "StanzaTokenizer(\"bxr\")", - "original_lang_name": "russia_buriat", - "original_lang_code": "bxr", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Cyrl": { - "full_object": "StanzaTokenizer(\"bxr\")", - "original_lang_name": "russia_buriat", - "original_lang_code": "bxr", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Dagur", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Daur", "iso_1_code": null, "iso_3_code": "dta", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5217", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5216", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mongour", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kangjia", "iso_1_code": null, "iso_3_code": "kxs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5219", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tu", "iso_1_code": null, "iso_3_code": "mjg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5220", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bonan", "iso_1_code": null, "iso_3_code": "peh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5221", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dongxiang", "iso_1_code": null, "iso_3_code": "sce", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5222", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yugur, East", "iso_1_code": null, "iso_3_code": "yuy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5223", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5218", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oirat-Khalkha", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Cyrl": { - "full_object": "StanzaTokenizer(\"bxr\")", - "original_lang_name": "russia_buriat", - "original_lang_code": "bxr", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Khamnigan Mongol", "iso_1_code": null, "iso_3_code": "ykh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5225", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khalkha-Buriat", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Cyrl": { - "full_object": "StanzaTokenizer(\"bxr\")", - "original_lang_name": "russia_buriat", - "original_lang_code": "bxr", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Buriat", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Cyrl": { - "full_object": "StanzaTokenizer(\"bxr\")", - "original_lang_name": "russia_buriat", - "original_lang_code": "bxr", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Buriat, Mongolia", "iso_1_code": null, "iso_3_code": "bxm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5228", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Buriat, Russia", "iso_1_code": null, "iso_3_code": "bxr", + "children": [], "tokenizers": { "Cyrl": { "full_object": "StanzaTokenizer(\"bxr\")", "original_lang_name": "russia_buriat", "original_lang_code": "bxr", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "5229", - "scripts": [ + "native_tokenizers": [ "Cyrl" ], - "own_tokenizer": true + "scripts": [ + "Cyrl" + ] }, { "name": "Buriat, China", "iso_1_code": null, "iso_3_code": "bxu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5230", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "5227", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Mongolian Proper", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Cyrl": { "full_object": "StanzaTokenizer(\"bxr\")", "original_lang_name": "russia_buriat", "original_lang_code": "bxr", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, + "node_i": "5227", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Mongolian Proper", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Mongolian, Halh", "iso_1_code": "mn", "iso_3_code": "khk", + "children": [], "tokenizers": { "Cyrl": { "full_object": "StanzaTokenizer(\"bxr\")", "original_lang_name": "russia_buriat", "original_lang_code": "bxr", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "5232", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Mongolian, Peripheral", "iso_1_code": "mn", "iso_3_code": "mvf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5233", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"bxr\")", + "original_lang_name": "russia_buriat", + "original_lang_code": "bxr", + "script": "Cyrl", + "class_name": "StanzaTokenizer" + } + }, "node_i": "5231", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "5226", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Oirat-Kalmyk-Darkhat", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Cyrl": { "full_object": "StanzaTokenizer(\"bxr\")", "original_lang_name": "russia_buriat", "original_lang_code": "bxr", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, + "node_i": "5226", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Oirat-Kalmyk-Darkhat", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Kalmyk-Oirat", "iso_1_code": null, "iso_3_code": "xal", + "children": [], "tokenizers": { "Cyrl": { "full_object": "StanzaTokenizer(\"bxr\")", "original_lang_name": "russia_buriat", "original_lang_code": "bxr", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "5235", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"bxr\")", + "original_lang_name": "russia_buriat", + "original_lang_code": "bxr", + "script": "Cyrl", + "class_name": "StanzaTokenizer" + } + }, "node_i": "5234", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"bxr\")", + "original_lang_name": "russia_buriat", + "original_lang_code": "bxr", + "script": "Cyrl", + "class_name": "StanzaTokenizer" + } + }, "node_i": "5224", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"bxr\")", + "original_lang_name": "russia_buriat", + "original_lang_code": "bxr", + "script": "Cyrl", + "class_name": "StanzaTokenizer" + } + }, "node_i": "5215", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mogholi", "iso_1_code": null, "iso_3_code": "mhj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5237", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5236", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"bxr\")", + "original_lang_name": "russia_buriat", + "original_lang_code": "bxr", + "script": "Cyrl", + "class_name": "StanzaTokenizer" + } + }, "node_i": "5214", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Mosetenan.json b/data/Mosetenan.json index 9d72f10dcf46137aad28abc6caca3665b8b36f2f..15f6a27708eed9c6f54bc84f49ac0fdcb7b1a989 100644 --- a/data/Mosetenan.json +++ b/data/Mosetenan.json @@ -2,22 +2,22 @@ "name": "Mosetenan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tsiman\u00e9", "iso_1_code": null, "iso_3_code": "cas", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5239", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5238", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Muran.json b/data/Muran.json index c710770e68c724418969fa2b8149379a2afa6030..0927ed79a1f8722bf776f9a46e6097984bd535bc 100644 --- a/data/Muran.json +++ b/data/Muran.json @@ -2,20 +2,20 @@ "name": "Muran", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pirah\u00e3", "iso_1_code": null, "iso_3_code": "myp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5241", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5240", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Muskogean.json b/data/Muskogean.json index 6e205c6d9c84a575b4052f936ae54d5025b08f04..8a52c4896e8d672a62f9a884b807e7ddb23a0c76 100644 --- a/data/Muskogean.json +++ b/data/Muskogean.json @@ -2,161 +2,161 @@ "name": "Muskogean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Eastern Muskogean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Central Muskogean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Apalachee-Alabama-Koasati", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Apalachee", "iso_1_code": null, "iso_3_code": "xap", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5246", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Alabama-Koasati", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Alabama", "iso_1_code": null, "iso_3_code": "akz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5248", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koasati", "iso_1_code": null, "iso_3_code": "cku", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5249", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5247", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5245", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hitchiti-Mikasuki", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mikasuki", "iso_1_code": null, "iso_3_code": "mik", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5251", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5250", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5244", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Creek-Seminole", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Muskogee", "iso_1_code": null, "iso_3_code": "mus", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5253", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5252", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5243", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western Muskogean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Choctaw", "iso_1_code": null, "iso_3_code": "cho", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5255", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chickasaw", "iso_1_code": null, "iso_3_code": "cic", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5256", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5254", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5242", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Nakh-Daghestanian.json b/data/Nakh-Daghestanian.json index 52ef3e9521627ea3cba326bce3658062e1580aa9..151859b52a589d853b0fe89da5ee2d9b400e9103 100644 --- a/data/Nakh-Daghestanian.json +++ b/data/Nakh-Daghestanian.json @@ -2,549 +2,549 @@ "name": "Nakh-Daghestanian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Avar-Andic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Andic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Akhvakh", "iso_1_code": null, "iso_3_code": "akv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5260", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Andi", "iso_1_code": null, "iso_3_code": "ani", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5261", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Botlikh", "iso_1_code": null, "iso_3_code": "bph", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5262", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chamalal", "iso_1_code": null, "iso_3_code": "cji", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5263", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ghodoberi", "iso_1_code": null, "iso_3_code": "gdo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5264", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karata", "iso_1_code": null, "iso_3_code": "kpt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5265", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bagvalal", "iso_1_code": null, "iso_3_code": "kva", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5266", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tindi", "iso_1_code": null, "iso_3_code": "tin", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5267", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5259", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Avar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Avar", "iso_1_code": "av", "iso_3_code": "ava", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5269", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5268", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5258", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dargi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dargwa", "iso_1_code": null, "iso_3_code": "dar", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5271", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Kubachi", "iso_1_code": null, "iso_3_code": "ugh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5272", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaitag", "iso_1_code": null, "iso_3_code": "xdq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5273", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5270", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khinalugh", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Khinalugh", "iso_1_code": null, "iso_3_code": "kjj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5275", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5274", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lak", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lak", "iso_1_code": null, "iso_3_code": "lbe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5277", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5276", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lezgic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Archi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Archi", "iso_1_code": null, "iso_3_code": "aqc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5280", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5279", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nuclear Lezgic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "East Lezgic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aghul", "iso_1_code": null, "iso_3_code": "agx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5283", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Lezgi", "iso_1_code": null, "iso_3_code": "lez", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5284", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Tabasaran", "iso_1_code": null, "iso_3_code": "tab", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5285", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5282", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South Lezgic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Budukh", "iso_1_code": null, "iso_3_code": "bdk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5287", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kryts", "iso_1_code": null, "iso_3_code": "kry", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5288", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5286", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Lezgic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Rutul", "iso_1_code": null, "iso_3_code": "rut", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5290", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tsakhur", "iso_1_code": null, "iso_3_code": "tkr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5291", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5289", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5281", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Udi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Udi", "iso_1_code": null, "iso_3_code": "udi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5293", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5292", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5278", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nakh", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Batsi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bats", "iso_1_code": null, "iso_3_code": "bbl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5296", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5295", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chechen-Ingush", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chechen", "iso_1_code": "ce", "iso_3_code": "che", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5298", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Ingush", "iso_1_code": null, "iso_3_code": "inh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5299", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5297", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5294", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tsezic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "East Tsezic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hunzib", "iso_1_code": null, "iso_3_code": "huz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5302", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bezhta", "iso_1_code": null, "iso_3_code": "kap", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5303", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "5301", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Tsezic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dido", "iso_1_code": null, "iso_3_code": "ddo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5305", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hinukh", "iso_1_code": null, "iso_3_code": "gin", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5306", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khvarshi", "iso_1_code": null, "iso_3_code": "khv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5307", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5304", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5300", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5257", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Nambikwara.json b/data/Nambikwara.json index 8090852b5e04ee5aa5babdc09dab5c1ecbc4d0f6..51b7a709d40d2aa7b7f2239c64f362303f23df65 100644 --- a/data/Nambikwara.json +++ b/data/Nambikwara.json @@ -2,125 +2,125 @@ "name": "Nambikwara", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Saban\u00ea", "iso_1_code": null, "iso_3_code": "sae", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5309", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nambikwara Complex", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nambiku\u00e1ra, Southern", "iso_1_code": null, "iso_3_code": "nab", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5311", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Alapmunte", "iso_1_code": null, "iso_3_code": "apv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5313", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mamaind\u00ea", "iso_1_code": null, "iso_3_code": "wmd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5314", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yalakalore", "iso_1_code": null, "iso_3_code": "xyl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5315", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Roosevelt Cluster", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lakond\u00ea", "iso_1_code": null, "iso_3_code": "lkd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5317", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Latund\u00ea", "iso_1_code": null, "iso_3_code": "ltn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5318", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tawand\u00ea", "iso_1_code": null, "iso_3_code": "xtw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5319", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5316", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5312", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5310", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5308", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Niger-Congo.json b/data/Niger-Congo.json index 905a6acb831bc09dd4a9d46cab2ca0573ba73313..3acd3160af9fd2465f05c6373fd86a45fd287cfe 100644 --- a/data/Niger-Congo.json +++ b/data/Niger-Congo.json @@ -2,36007 +2,33567 @@ "name": "Niger-Congo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Atlantic-Congo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Atlantic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"wo\")", - "original_lang_name": "wolof", - "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bijago", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bijag\u00f3", "iso_1_code": null, "iso_3_code": "bjg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5324", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5323", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"wo\")", - "original_lang_name": "wolof", - "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bak", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"wo\")", - "original_lang_name": "wolof", - "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Balant-Ganja", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Balanta-Ganja", "iso_1_code": null, "iso_3_code": "bjt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5328", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Balanta", "iso_1_code": null, "iso_3_code": "ble", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5329", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5327", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jola", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"wo\")", - "original_lang_name": "wolof", - "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bayot", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bayot", "iso_1_code": null, "iso_3_code": "bda", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5332", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5331", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jola Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"wo\")", - "original_lang_name": "wolof", - "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Jola Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"wo\")", - "original_lang_name": "wolof", - "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Gusilay", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"wo\")", - "original_lang_name": "wolof", - "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bandial", "iso_1_code": null, "iso_3_code": "bqj", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "5336", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Gusilay", "iso_1_code": null, "iso_3_code": "gsl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5337", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "5335", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Her-Ejamat", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jola-Felupe", "iso_1_code": null, "iso_3_code": "eja", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5339", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kerak", "iso_1_code": null, "iso_3_code": "hhr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5340", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5338", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jola-Fonyi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"wo\")", - "original_lang_name": "wolof", - "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Jola-Fonyi", "iso_1_code": null, "iso_3_code": "dyo", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "5342", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5341", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Jola-Kasa", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "5341", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Jola-Kasa", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Jola-Kasa", "iso_1_code": null, "iso_3_code": "csk", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "5344", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "5343", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "5334", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Karon-Mlomp", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "5334", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Karon-Mlomp", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Karon", "iso_1_code": null, "iso_3_code": "krx", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "5346", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mlomp", "iso_1_code": null, "iso_3_code": "mlo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5347", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "5345", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Kwatay", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "5345", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Kwatay", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Kuwaataay", "iso_1_code": null, "iso_3_code": "cwt", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "5349", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "5348", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "5333", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "5330", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Manjaku-Papel", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "5330", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Manjaku-Papel", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Mankanya", "iso_1_code": null, "iso_3_code": "knf", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "5351", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mandjak", "iso_1_code": null, "iso_3_code": "mfv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5352", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Papel", "iso_1_code": null, "iso_3_code": "pbo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5353", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "5350", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "5326", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Cangin", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "5326", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Cangin", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Laalaa", "iso_1_code": null, "iso_3_code": "cae", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5355", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Paloor", "iso_1_code": null, "iso_3_code": "fap", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5356", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ndut", "iso_1_code": null, "iso_3_code": "ndv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5357", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saafi-Saafi", "iso_1_code": null, "iso_3_code": "sav", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5358", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Noon", "iso_1_code": null, "iso_3_code": "snf", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "5359", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5354", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Eastern Senegal-Guinea", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "5354", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Eastern Senegal-Guinea", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Banyun", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bainouk-Gunyu\u00f1o", "iso_1_code": null, "iso_3_code": "bab", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5362", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bainouk-Samik", "iso_1_code": null, "iso_3_code": "bcb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5363", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bainouk-Gunyaamolo", "iso_1_code": null, "iso_3_code": "bcz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5364", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5361", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nun", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kasanga", "iso_1_code": null, "iso_3_code": "ccj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5366", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kobiana", "iso_1_code": null, "iso_3_code": "kcj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5367", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5365", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tenda", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"wo\")", - "original_lang_name": "wolof", - "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Biafada", "iso_1_code": null, "iso_3_code": "bif", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5369", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oniyan", "iso_1_code": null, "iso_3_code": "bsc", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "5370", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Wamey", "iso_1_code": null, "iso_3_code": "cou", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "5371", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Badyara", "iso_1_code": null, "iso_3_code": "pbp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5372", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "M\u00e9nik", "iso_1_code": null, "iso_3_code": "tnr", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "5373", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "5368", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "5360", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbulungish-Nalu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mbulungish", "iso_1_code": null, "iso_3_code": "mbv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5375", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nalu", "iso_1_code": null, "iso_3_code": "naj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5376", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5374", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Senegambian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"wo\")", - "original_lang_name": "wolof", - "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Fula-Wolof", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"wo\")", - "original_lang_name": "wolof", - "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Fula", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"wo\")", - "original_lang_name": "wolof", - "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "East Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"wo\")", - "original_lang_name": "wolof", - "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Fulfulde, Western Niger", "iso_1_code": "ff", "iso_3_code": "fuh", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "5381", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Fulfulde, Central-Eastern Niger", "iso_1_code": "ff", "iso_3_code": "fuq", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "5382", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Fulfulde, Nigerian", "iso_1_code": "ff", "iso_3_code": "fuv", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "5383", + "native_tokenizers": [], "scripts": [ "Latn", "Arab" - ], - "own_tokenizer": false + ] } ], - "node_i": "5380", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Eastern", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "5380", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Eastern", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Fulfulde, Adamawa", "iso_1_code": "ff", "iso_3_code": "fub", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "5385", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Fulfulde, Bagirmi", "iso_1_code": "ff", "iso_3_code": "fui", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5386", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "5384", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "West Central", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "5384", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "West Central", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Fulfulde, Maasina", "iso_1_code": "ff", "iso_3_code": "ffm", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "5388", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Fulfulde, Borgu", "iso_1_code": "ff", "iso_3_code": "fue", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", - "original_lang_name": "wolof", - "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "original_lang_name": "wolof", + "original_lang_code": "wol", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "5389", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Pular", "iso_1_code": "ff", "iso_3_code": "fuf", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "5390", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "5387", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pulaar", "iso_1_code": "ff", "iso_3_code": "fuc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5392", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5391", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "5379", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Wolof", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "5379", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Wolof", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Wolof, Gambian", "iso_1_code": null, "iso_3_code": "wof", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5394", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wolof", "iso_1_code": "wo", "iso_3_code": "wol", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "5395", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "5393", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "5378", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Serer", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "5378", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Serer", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Serer-Sine", "iso_1_code": null, "iso_3_code": "srr", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "5397", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "5396", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "5377", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "5325", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Southern", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "5325", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Southern", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Limba", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"wo\")", - "original_lang_name": "wolof", - "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Limba, West-Central", "iso_1_code": null, "iso_3_code": "lia", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "5400", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Limba, East", "iso_1_code": null, "iso_3_code": "lma", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5401", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "5399", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Mel", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "5399", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Mel", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Bullom-Kissi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"wo\")", - "original_lang_name": "wolof", - "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bullom", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bom-Kim", "iso_1_code": null, "iso_3_code": "bmf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5406", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bullom So", "iso_1_code": null, "iso_3_code": "buy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5407", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5405", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sherbro", "iso_1_code": null, "iso_3_code": "bun", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5409", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5408", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5404", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kissi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"wo\")", - "original_lang_name": "wolof", - "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kissi, Northern", "iso_1_code": null, "iso_3_code": "kqs", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "5411", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kisi, Southern", "iso_1_code": null, "iso_3_code": "kss", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "5412", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "5410", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "5403", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gola", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gola", "iso_1_code": null, "iso_3_code": "gol", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5414", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5413", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Temne", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"wo\")", - "original_lang_name": "wolof", - "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Baga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"wo\")", - "original_lang_name": "wolof", - "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Baga Pokur", "iso_1_code": null, "iso_3_code": "bcg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5417", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Baga Koga", "iso_1_code": null, "iso_3_code": "bgo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5418", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Baga Manduri", "iso_1_code": null, "iso_3_code": "bmd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5419", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Baga Kaloum", "iso_1_code": null, "iso_3_code": "bqf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5420", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Baga Sitemu", "iso_1_code": null, "iso_3_code": "bsp", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "5421", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Baga Soban\u00e9", "iso_1_code": null, "iso_3_code": "bsv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5422", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Landoma", "iso_1_code": null, "iso_3_code": "ldm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5423", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "5416", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Temne-Banta", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "5416", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Temne-Banta", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Themne", "iso_1_code": null, "iso_3_code": "tem", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"wo\")", "original_lang_name": "wolof", - "original_lang_code": "wol", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "original_lang_code": "wol", + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "5425", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "5424", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "5415", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "5402", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sua", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mansoanka", "iso_1_code": null, "iso_3_code": "msw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5427", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5426", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "5398", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"wo\")", + "original_lang_name": "wolof", + "original_lang_code": "wol", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "5322", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ijoid", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Defaka", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Defaka", "iso_1_code": null, "iso_3_code": "afn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5430", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5429", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ijo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ijo, Southeast", "iso_1_code": null, "iso_3_code": "ijs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5433", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5432", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Northeastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nkoroo", "iso_1_code": null, "iso_3_code": "nkx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5436", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ibani", "iso_1_code": null, "iso_3_code": "iby", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5438", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kalabari", "iso_1_code": null, "iso_3_code": "ijn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5439", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kirike", "iso_1_code": null, "iso_3_code": "okr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5440", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5437", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5435", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5434", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Inland Ijo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Biseni", "iso_1_code": null, "iso_3_code": "ije", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5443", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Okodia", "iso_1_code": null, "iso_3_code": "okd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5444", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oruma", "iso_1_code": null, "iso_3_code": "orr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5445", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5442", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5441", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Ijo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Izon", "iso_1_code": null, "iso_3_code": "ijc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5447", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5446", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5431", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5428", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Volta-Congo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Benue-Congo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Akpes", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Akpes", "iso_1_code": null, "iso_3_code": "ibe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5451", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5450", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bantoid", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Dakoid", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Samba Daka", "iso_1_code": null, "iso_3_code": "ccg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5455", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dirim", "iso_1_code": null, "iso_3_code": "dir", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5456", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dong", "iso_1_code": null, "iso_3_code": "doh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5457", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lamja-Dengsa-Tola", "iso_1_code": null, "iso_3_code": "ldh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5458", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gaa", "iso_1_code": null, "iso_3_code": "ttb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5459", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5454", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Fam", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Fam", "iso_1_code": null, "iso_3_code": "fam", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5461", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5460", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mambiloid", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Mambila-Konja", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Konja", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kwanja", "iso_1_code": null, "iso_3_code": "knp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5465", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Twendi", "iso_1_code": null, "iso_3_code": "twn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5466", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5464", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Magu-Kamkam-Kila", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mbongno", "iso_1_code": null, "iso_3_code": "bgu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5468", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Somyev", "iso_1_code": null, "iso_3_code": "kgt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5469", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mvanip", "iso_1_code": null, "iso_3_code": "mcj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5470", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ndunda", "iso_1_code": null, "iso_3_code": "nuh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5471", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5467", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mambila", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Mambila, Cameroon", "iso_1_code": null, "iso_3_code": "mcu", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5473", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mambila, Nigeria", "iso_1_code": null, "iso_3_code": "mzk", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5474", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5472", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Njerup", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Njerep", "iso_1_code": null, "iso_3_code": "njr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5476", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5475", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5463", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ndoro", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ndoola", "iso_1_code": null, "iso_3_code": "ndr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5478", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5477", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Suga-Vute", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Suga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nizaa", "iso_1_code": null, "iso_3_code": "sgi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5481", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5480", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vute", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Vute", "iso_1_code": null, "iso_3_code": "vut", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5483", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Wawa", "iso_1_code": null, "iso_3_code": "www", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5484", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] + } + ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" } - ], + }, "node_i": "5482", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5479", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5462", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "5453", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Southern", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5453", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Southern", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Beboid", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Sari", "iso_1_code": null, "iso_3_code": "asj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5487", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbuk", "iso_1_code": null, "iso_3_code": "bpc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5488", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bukwen", "iso_1_code": null, "iso_3_code": "buz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5489", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Naami", "iso_1_code": null, "iso_3_code": "bzv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5490", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chung", "iso_1_code": null, "iso_3_code": "cnq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5491", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kemedzung", "iso_1_code": null, "iso_3_code": "dmo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5492", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mashi", "iso_1_code": null, "iso_3_code": "jms", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5493", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Naki", "iso_1_code": null, "iso_3_code": "mff", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5494", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nchane", "iso_1_code": null, "iso_3_code": "ncr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5495", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Noone", "iso_1_code": null, "iso_3_code": "nhu", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5496", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5486", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Ekoid", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5486", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Ekoid", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Ejagham", "iso_1_code": null, "iso_3_code": "etu", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5498", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ndoe", "iso_1_code": null, "iso_3_code": "nbb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5499", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bakor", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Abanyom", "iso_1_code": null, "iso_3_code": "abm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5501", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ekajuk", "iso_1_code": null, "iso_3_code": "eka", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5502", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nkem-Nkum", "iso_1_code": null, "iso_3_code": "isi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5503", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nnam", "iso_1_code": null, "iso_3_code": "nbp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5504", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nde-Nsele-Nta", "iso_1_code": null, "iso_3_code": "ndd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5505", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Efutop", "iso_1_code": null, "iso_3_code": "ofu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5506", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5500", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5497", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jarawan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cameroon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nagumi", "iso_1_code": null, "iso_3_code": "ngv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5509", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbonga", "iso_1_code": null, "iso_3_code": "xmb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5510", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5508", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nigerian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mbat", "iso_1_code": null, "iso_3_code": "bau", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5512", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kulung", "iso_1_code": null, "iso_3_code": "bbu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5513", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bille", "iso_1_code": null, "iso_3_code": "bil", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5514", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lame", "iso_1_code": null, "iso_3_code": "bma", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5515", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Duguri", "iso_1_code": null, "iso_3_code": "dbm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5516", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dulubu", "iso_1_code": null, "iso_3_code": "dbo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5517", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shiki", "iso_1_code": null, "iso_3_code": "gua", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5518", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gwa", "iso_1_code": null, "iso_3_code": "gwb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5519", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gwak", "iso_1_code": null, "iso_3_code": "jgk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5520", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bankal", "iso_1_code": null, "iso_3_code": "jjr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5521", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Labir", "iso_1_code": null, "iso_3_code": "jku", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5522", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbula-Bwazza", "iso_1_code": null, "iso_3_code": "mbu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5523", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mama", "iso_1_code": null, "iso_3_code": "mma", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5524", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5511", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5507", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mamfe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Denya", "iso_1_code": null, "iso_3_code": "anv", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5526", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kenyang", "iso_1_code": null, "iso_3_code": "ken", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5527", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kendem", "iso_1_code": null, "iso_3_code": "kvm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5528", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5525", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mbe", "iso_1_code": null, "iso_3_code": "mfo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5530", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5529", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Narrow Bantu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "D", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bembe-Buyi (D.54)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bembe", "iso_1_code": null, "iso_3_code": "bmb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5535", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5534", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bembe-Buyi (D.55)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Buyu", "iso_1_code": null, "iso_3_code": "byi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5537", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5536", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bira-Nyali (D.301)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kari", "iso_1_code": null, "iso_3_code": "kbj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5539", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5538", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bira-Nyali (D.302)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Boguru", "iso_1_code": null, "iso_3_code": "bqu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5541", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5540", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bira-Nyali (D.303)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ngbinda", "iso_1_code": null, "iso_3_code": "nbd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5543", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5542", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bira-Nyali (D.304)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Homa", "iso_1_code": null, "iso_3_code": "hom", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5545", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5544", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bira-Nyali (D.305)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nyanga-li", "iso_1_code": null, "iso_3_code": "nyc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5547", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5546", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bira-Nyali (D.307)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mayeka", "iso_1_code": null, "iso_3_code": "myc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5549", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5548", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bira-Nyali (D.308)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bodo", "iso_1_code": null, "iso_3_code": "boy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5551", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5550", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bira-Nyali (D.31)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bhele", "iso_1_code": null, "iso_3_code": "bhy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5553", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5552", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bira-Nyali (D.311)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bila", "iso_1_code": null, "iso_3_code": "bip", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5555", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5554", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bira-Nyali (D.312)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kaiku", "iso_1_code": null, "iso_3_code": "kkq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5557", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5556", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bira-Nyali (D.32)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bira", "iso_1_code": null, "iso_3_code": "brf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5559", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5558", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bira-Nyali (D.33)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nyali", "iso_1_code": null, "iso_3_code": "nlj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5561", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5560", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bira-Nyali (D.331)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Vanuma", "iso_1_code": null, "iso_3_code": "vau", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5563", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5562", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bira-Nyali (D.332)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Budu", "iso_1_code": null, "iso_3_code": "buu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5565", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5564", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bira-Nyali (D.333)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ndaka", "iso_1_code": null, "iso_3_code": "ndk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5567", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5566", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bira-Nyali (D.334)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mbo", "iso_1_code": null, "iso_3_code": "zmw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5569", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5568", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bira-Nyali (D.335)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Beeke", "iso_1_code": null, "iso_3_code": "bkf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5571", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5570", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bira-Nyali (D.336)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ngbee", "iso_1_code": null, "iso_3_code": "jgb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5573", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5572", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lega-Holoholo (D.201)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lika", "iso_1_code": null, "iso_3_code": "lik", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5575", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5574", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lega-Holoholo (D.21)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bali", "iso_1_code": null, "iso_3_code": "bcp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5577", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5576", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lega-Holoholo (D.211)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kango", "iso_1_code": null, "iso_3_code": "kzy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5579", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5578", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lega-Holoholo (D.22)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Amba", "iso_1_code": null, "iso_3_code": "rwm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5581", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5580", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lega-Holoholo (D.23)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Komo", "iso_1_code": null, "iso_3_code": "kmw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5583", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5582", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lega-Holoholo (D.24)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Songoora", "iso_1_code": null, "iso_3_code": "sod", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5585", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5584", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lega-Holoholo (D.25)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Lega-Mwenga", "iso_1_code": null, "iso_3_code": "lgm", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5587", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5586", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Lega-Holoholo (D.251)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5586", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Lega-Holoholo (D.251)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Lega-Shabunda", "iso_1_code": null, "iso_3_code": "lea", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5589", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5588", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lega-Holoholo (D.251)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kanu", "iso_1_code": null, "iso_3_code": "khx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5591", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kwami", "iso_1_code": null, "iso_3_code": "ktf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5592", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5590", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lega-Holoholo (D.26)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Zimba", "iso_1_code": null, "iso_3_code": "zmb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5594", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5593", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lega-Holoholo (D.27)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bangubangu", "iso_1_code": null, "iso_3_code": "bnx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5596", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5595", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lega-Holoholo (D.28)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Holoholo", "iso_1_code": null, "iso_3_code": "hoo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5598", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5597", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbole-Enya (D.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mbole", "iso_1_code": null, "iso_3_code": "mdq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5600", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5599", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbole-Enya (D.12)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lengola", "iso_1_code": null, "iso_3_code": "lej", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5602", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5601", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbole-Enya (D.13)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mituku", "iso_1_code": null, "iso_3_code": "zmq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5604", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5603", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbole-Enya (D.14)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Enya", "iso_1_code": null, "iso_3_code": "gey", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5606", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5605", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbolle-Enya (D.141)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Zula", "iso_1_code": null, "iso_3_code": "zla", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5608", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5607", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyanga (D.43)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nyanga", "iso_1_code": null, "iso_3_code": "nyj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5610", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5609", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "5533", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "E", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5533", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "E", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Chaga (E.621)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Machame", "iso_1_code": null, "iso_3_code": "jmc", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5613", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Rwa", "iso_1_code": null, "iso_3_code": "rwk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5614", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "5612", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Chaga (E.622)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5612", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Chaga (E.622)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Mochi", "iso_1_code": null, "iso_3_code": "old", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5616", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Vunjo", "iso_1_code": null, "iso_3_code": "vun", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5617", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5615", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chaga (E.623)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Rombo", "iso_1_code": null, "iso_3_code": "rof", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5619", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5618", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chaga (E.64)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kahe", "iso_1_code": null, "iso_3_code": "hka", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5621", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5620", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chaga (E.65)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gweno", "iso_1_code": null, "iso_3_code": "gwe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5623", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5622", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kikuyu-Kamba (E.51)", "iso_1_code": null, - "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, + "iso_3_code": null, "children": [ { "name": "Gikuyu", "iso_1_code": "ki", "iso_3_code": "kik", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5625", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5624", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kikuyu-Kamba (E.52)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kiembu", "iso_1_code": null, "iso_3_code": "ebu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5627", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5626", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kikuyu-Kamba (E.53)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kim\u00ee\u00eeru", "iso_1_code": null, "iso_3_code": "mer", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5629", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5628", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kikuyu-Kamba (E.531)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mwimbi-Muthambi", "iso_1_code": null, "iso_3_code": "mws", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5631", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5630", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kikuyu-Kamba (E.54)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kitharaka", "iso_1_code": null, "iso_3_code": "thk", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5633", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5632", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kikuyu-Kamba (E.541)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gichuka", "iso_1_code": null, "iso_3_code": "cuh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5635", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5634", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kikuyu-Kamba (E.55)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kamba", "iso_1_code": null, "iso_3_code": "kam", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5637", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5636", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kikuyu-Kamba (E.56)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dhaiso", "iso_1_code": null, "iso_3_code": "dhs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5639", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5638", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyika-Taita (E.701)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kiwilwana", "iso_1_code": null, "iso_3_code": "mlk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5641", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5640", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyika-Taita (E.71)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kipfokomu", "iso_1_code": null, "iso_3_code": "pkb", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5643", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5642", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Nyika-Taita (E.72)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5642", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Nyika-Taita (E.72)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Chichonyi-Chidzihana-Chikauma", "iso_1_code": null, "iso_3_code": "coh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5645", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chiduruma", "iso_1_code": null, "iso_3_code": "dug", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5646", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kigiryama", "iso_1_code": null, "iso_3_code": "nyf", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5647", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5644", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Nyika-Taita (E.73)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5644", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Nyika-Taita (E.73)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Chidigo", "iso_1_code": null, "iso_3_code": "dig", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5649", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5648", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyika-Taita (E.731)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Segeju", "iso_1_code": null, "iso_3_code": "seg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5651", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5650", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyika-Taita (E.74)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dawida", "iso_1_code": null, "iso_3_code": "dav", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5653", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Taveta", "iso_1_code": null, "iso_3_code": "tvs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5654", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5652", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyika-Taita (E.741)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sagalla", "iso_1_code": null, "iso_3_code": "tga", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5656", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5655", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Temi (E.46)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Temi", "iso_1_code": null, "iso_3_code": "soz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5658", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5657", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "5611", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "F", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5611", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "F", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Ruwila", "iso_1_code": null, "iso_3_code": "rwl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5660", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nilamba-Rangi (F.31)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Isanzu", "iso_1_code": null, "iso_3_code": "isn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5662", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nilamba", "iso_1_code": null, "iso_3_code": "nim", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5663", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5661", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Nilamba-Rangi (F.32)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5661", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Nilamba-Rangi (F.32)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Nyaturu", "iso_1_code": null, "iso_3_code": "rim", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5665", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5664", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nilamba-Rangi (F.33)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Rangi", "iso_1_code": null, "iso_3_code": "lag", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5667", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5666", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nilamba-Rangi (F.34)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mbugwe", "iso_1_code": null, "iso_3_code": "mgz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5669", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5668", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sukuma-Nyamwezi (F.21)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Sukuma", "iso_1_code": null, "iso_3_code": "suk", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5671", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5670", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sukuma-Nyamwezi (F.22)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Konongo", "iso_1_code": null, "iso_3_code": "kcz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5673", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyamwezi", "iso_1_code": null, "iso_3_code": "nym", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5674", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5672", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sukuma-Nyamwezi (F.23)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sumbwa", "iso_1_code": null, "iso_3_code": "suw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5676", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5675", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sukuma-Nyamwezi (F.24)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kimbu", "iso_1_code": null, "iso_3_code": "kiv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5678", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5677", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sukuma-Nyamwezi (F.25)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bungu", "iso_1_code": null, "iso_3_code": "wun", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5680", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5679", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tongwe-Bende (F.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tongwe", "iso_1_code": null, "iso_3_code": "tny", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5682", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5681", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tongwe-Bende (F.12)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bende", "iso_1_code": null, "iso_3_code": "bdp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5684", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5683", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "5659", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "G", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5659", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "G", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Bena-Kinga (G.61)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sangu", "iso_1_code": null, "iso_3_code": "sbp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5687", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5686", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bena-Kinga (G.62)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Hehe", "iso_1_code": null, "iso_3_code": "heh", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5689", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5688", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bena-Kinga (G.63)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bena", "iso_1_code": null, "iso_3_code": "bez", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5691", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Benamanga", "iso_1_code": null, "iso_3_code": "egm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5692", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5690", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bena-Kinga (G.64)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pangwa", "iso_1_code": null, "iso_3_code": "pbr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5694", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5693", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bena-Kinga (G.65)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kinga", "iso_1_code": null, "iso_3_code": "zga", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5696", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5695", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bena-Kinga (G.651)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Magoma", "iso_1_code": null, "iso_3_code": "gmx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5698", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5697", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bena-Kinga (G.66)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Vwanji", "iso_1_code": null, "iso_3_code": "wbi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5700", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5699", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bena-Kinga (G.67)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kisi", "iso_1_code": null, "iso_3_code": "kiz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5702", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5701", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gogo-Kagulu (G.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Gogo", "iso_1_code": null, "iso_3_code": "gog", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5704", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5703", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Gogo-Kagulu (G.12)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5703", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Gogo-Kagulu (G.12)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Kagulu", "iso_1_code": null, "iso_3_code": "kki", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5706", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5705", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Pogolo-Ndamba (G.51)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5705", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Pogolo-Ndamba (G.51)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Pogolo", "iso_1_code": null, "iso_3_code": "poy", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5708", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5707", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Pogolo-Ndamba (G.52)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5707", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Pogolo-Ndamba (G.52)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Ndamba", "iso_1_code": null, "iso_3_code": "ndj", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5710", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5709", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shambala (G.22)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Asu", "iso_1_code": null, "iso_3_code": "asa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5712", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5711", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shambala (G.23)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Shambala", "iso_1_code": null, "iso_3_code": "ksb", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5714", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5713", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shambala (G.24)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bondei", "iso_1_code": null, "iso_3_code": "bou", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5716", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5715", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { - "name": "Swahili (G.40)", - "iso_1_code": null, - "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, + "name": "Swahili (G.40)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Swahili, Congo", "iso_1_code": "sw", "iso_3_code": "swc", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5718", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5717", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Swahili (G.402)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Makwe", "iso_1_code": null, "iso_3_code": "ymk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5720", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5719", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Swahili (G.403)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Mwani", "iso_1_code": null, "iso_3_code": "wmw", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5722", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5721", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Swahili (G.42)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5721", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Swahili (G.42)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Swahili", "iso_1_code": "sw", "iso_3_code": "swh", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5724", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5723", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Swahili (G.44)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5723", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Swahili (G.44)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Comorian, Maore", "iso_1_code": null, "iso_3_code": "swb", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5726", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Comorian, Mwali", "iso_1_code": null, "iso_3_code": "wlc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5727", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Comorian, Ndzwani", "iso_1_code": null, "iso_3_code": "wni", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5728", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Comorian, Ngazidja", "iso_1_code": null, "iso_3_code": "zdj", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5729", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5725", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zigula-Zaramo (G.301)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Doe", "iso_1_code": null, "iso_3_code": "doe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5731", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5730", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zigula-Zaramo (G.31)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Zigula", "iso_1_code": null, "iso_3_code": "ziw", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5733", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5732", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zigula-Zaramo (G.311)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mushungulu", "iso_1_code": null, "iso_3_code": "xma", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5735", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5734", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zigula-Zaramo (G.32)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kwere", "iso_1_code": null, "iso_3_code": "cwe", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5737", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5736", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zigula-Zaramo (G.33)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Zaramo", "iso_1_code": null, "iso_3_code": "zaj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5739", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5738", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zigula-Zaramo (G.34)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ngulu", "iso_1_code": null, "iso_3_code": "ngp", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5741", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5740", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Zigula-Zaramo (G.35)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5740", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Zigula-Zaramo (G.35)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Luguru", "iso_1_code": null, "iso_3_code": "ruf", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5743", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5742", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zigula-Zaramo (G.36)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kami", "iso_1_code": null, "iso_3_code": "kcu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5745", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5744", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zigula-Zaramo (G.37)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kutu", "iso_1_code": null, "iso_3_code": "kdc", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5747", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5746", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Zigula-Zaramo (G.38)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5746", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Zigula-Zaramo (G.38)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Vidunda", "iso_1_code": null, "iso_3_code": "vid", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5749", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5748", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zigula-Zaramo (G.39)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sagala", "iso_1_code": null, "iso_3_code": "sbm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5751", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5750", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } - ], - "node_i": "5685", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "H", - "iso_1_code": null, - "iso_3_code": null, + ], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5685", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "H", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Kikongo (H.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Beembe", "iso_1_code": null, "iso_3_code": "beq", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5754", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5753", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kikongo (H.112)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Doondo", "iso_1_code": null, "iso_3_code": "dde", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5756", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaamba", "iso_1_code": null, "iso_3_code": "xku", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5757", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5755", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kikongo (H.12)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Vili", "iso_1_code": null, "iso_3_code": "vif", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5759", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5758", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kikongo (H.13)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kunyi", "iso_1_code": null, "iso_3_code": "njx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5761", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5760", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kikongo (H.131)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Suundi", "iso_1_code": null, "iso_3_code": "sdj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5763", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5762", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kikongo (H.16)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Koongo", "iso_1_code": "kg", "iso_3_code": "kng", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5765", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kikongo", "iso_1_code": "kg", "iso_3_code": "kwy", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5766", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Laari", "iso_1_code": "kg", "iso_3_code": "ldi", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5767", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kiyombe", "iso_1_code": null, "iso_3_code": "yom", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5768", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5764", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Kimbundu (H.21)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5764", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Kimbundu (H.21)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Kimbundu", "iso_1_code": null, "iso_3_code": "kmb", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5770", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mpinda", "iso_1_code": null, "iso_3_code": "pnd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5771", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5769", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kimbundu (H.23)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kibala", "iso_1_code": null, "iso_3_code": "blv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5773", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5772", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kimbundu (H.24)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Songo", "iso_1_code": null, "iso_3_code": "nsx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5775", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5774", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbala-Hunganna (H.41)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mbala", "iso_1_code": null, "iso_3_code": "mdp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5777", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5776", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbala-Hunganna (H.42)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hungana", "iso_1_code": null, "iso_3_code": "hum", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5779", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5778", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yaka (H.31)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lonzo", "iso_1_code": null, "iso_3_code": "lnz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5781", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pelende", "iso_1_code": null, "iso_3_code": "ppp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5782", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yaka", "iso_1_code": null, "iso_3_code": "yaf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5783", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5780", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yaka (H.32)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hungu", "iso_1_code": null, "iso_3_code": "hng", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5785", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Suku", "iso_1_code": null, "iso_3_code": "sub", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5786", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5784", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yaka (H.321)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sonde", "iso_1_code": null, "iso_3_code": "shc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5788", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5787", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yaka (H.34)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mbangala", "iso_1_code": null, "iso_3_code": "mxg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5790", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5789", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5752", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "J", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"lg\")", - "original_lang_name": "ganda", - "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Haya-Jita (E.20)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Subi", "iso_1_code": null, "iso_3_code": "xsj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5793", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5792", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Haya-Jita (E.21)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nyambo", "iso_1_code": null, "iso_3_code": "now", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5795", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5794", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Haya-Jita (E.22)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"lg\")", - "original_lang_name": "ganda", - "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Haya", "iso_1_code": null, "iso_3_code": "hay", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5797", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5796", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Haya-Jita (E.23)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Zinza", "iso_1_code": null, "iso_3_code": "zin", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5799", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5798", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Haya-Jita (E.24)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kerewe", "iso_1_code": null, "iso_3_code": "ked", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5801", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5800", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Haya-Jita (E.25)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jita", "iso_1_code": null, "iso_3_code": "jit", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5803", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5802", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Haya-Jita (E.251)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kwaya", "iso_1_code": null, "iso_3_code": "kya", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5805", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5804", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Haya-Jita (E.252)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kara", "iso_1_code": null, "iso_3_code": "reg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5807", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5806", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Konzo-Ndandi (D.40)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kobo", "iso_1_code": null, "iso_3_code": "okc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5809", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5808", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Konzo-Ndandi (D.41)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"lg\")", - "original_lang_name": "ganda", - "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Konzo", "iso_1_code": null, "iso_3_code": "koo", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5811", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5810", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Konzo-Ndandi (D.42)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5810", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Konzo-Ndandi (D.42)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Nande", "iso_1_code": null, "iso_3_code": "nnb", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5813", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5812", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Logooli-Kuria (E.401)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ngoreme", "iso_1_code": null, "iso_3_code": "ngq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5815", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5814", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Logooli-Kuria (E.402)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ikizu", "iso_1_code": null, "iso_3_code": "ikz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5817", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5816", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Logooli-Kuria (E.403)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"lg\")", - "original_lang_name": "ganda", - "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Suba", "iso_1_code": null, "iso_3_code": "sxb", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5819", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5818", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Logooli-Kuria (E.405)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kabwa", "iso_1_code": null, "iso_3_code": "cwa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5821", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5820", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Logooli-Kuria (E.406)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Singa", "iso_1_code": null, "iso_3_code": "sgm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5823", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5822", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Logooli-Kuria (E.41)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lulogooli", "iso_1_code": null, "iso_3_code": "rag", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5825", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5824", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Logooli-Kuria (E.411)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Luidakho-Luisukha-Lutirichi", "iso_1_code": null, "iso_3_code": "ida", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5827", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5826", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Logooli-Kuria (E.42)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"lg\")", - "original_lang_name": "ganda", - "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ekegusii", "iso_1_code": null, "iso_3_code": "guz", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5829", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5828", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Logooli-Kuria (E.43)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5828", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Logooli-Kuria (E.43)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Kuria", "iso_1_code": null, "iso_3_code": "kuj", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5831", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5830", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Logooli-Kuria (E.431)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Suba-Simbiti", "iso_1_code": null, "iso_3_code": "ssc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5833", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5832", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Logooli-Kuria (E.44)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Zanaki", "iso_1_code": null, "iso_3_code": "zak", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5835", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5834", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Logooli-Kuria (E.45)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ikoma-Nata-Isenye", "iso_1_code": null, "iso_3_code": "ntk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5837", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5836", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Masaba-Luhya (E.31)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"lg\")", - "original_lang_name": "ganda", - "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bukusu", "iso_1_code": null, "iso_3_code": "bxk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5839", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lutachoni", "iso_1_code": null, "iso_3_code": "lts", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5840", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Masaaba", "iso_1_code": null, "iso_3_code": "myx", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5841", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5838", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Masaba-Luhya (E.32)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5838", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Masaba-Luhya (E.32)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Lukabaras", "iso_1_code": null, "iso_3_code": "lkb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5843", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Olushisa", "iso_1_code": null, "iso_3_code": "lks", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5844", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Olumarama", "iso_1_code": null, "iso_3_code": "lrm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5845", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Olutsotso", "iso_1_code": null, "iso_3_code": "lto", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5846", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oluwanga", "iso_1_code": null, "iso_3_code": "lwg", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5847", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nyala", "iso_1_code": null, "iso_3_code": "nle", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5848", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5842", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Masaba-Luhya (E.33)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Olunyole", "iso_1_code": null, "iso_3_code": "nyd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5850", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5849", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Masaba-Luhya (E.34)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"lg\")", - "original_lang_name": "ganda", - "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Saamya-Gwe", "iso_1_code": null, "iso_3_code": "lsm", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5852", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5851", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Masaba-Luhya (E.341)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Olukhayo", "iso_1_code": null, "iso_3_code": "lko", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5854", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5853", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Masaba-Luhya (E.342)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Olumarachi", "iso_1_code": null, "iso_3_code": "lri", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5856", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5855", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Masaba-Luhya (E.35)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"lg\")", - "original_lang_name": "ganda", - "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Nyole", "iso_1_code": null, "iso_3_code": "nuj", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5858", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5857", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Nyoro-Ganda (E.101)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5857", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Nyoro-Ganda (E.101)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Gungu", "iso_1_code": null, "iso_3_code": "rub", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5860", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5859", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Nyoro-Ganda (E.102)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", - "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "original_lang_code": "lug", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5859", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Nyoro-Ganda (E.102)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Talinga-Bwisi", "iso_1_code": null, "iso_3_code": "tlj", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5862", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5861", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyoro-Ganda (E.103)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ruruuli-Runyala", "iso_1_code": null, "iso_3_code": "ruc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5864", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5863", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyoro-Ganda (E.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"lg\")", - "original_lang_name": "ganda", - "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Nyoro", "iso_1_code": null, "iso_3_code": "nyo", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5866", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5865", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Nyoro-Ganda (E.12)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5865", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Nyoro-Ganda (E.12)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Tooro", "iso_1_code": null, "iso_3_code": "ttj", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5868", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5867", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyoro-Ganda (E.121)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hema", "iso_1_code": null, "iso_3_code": "nix", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5870", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5869", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyoro-Ganda (E.13)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"lg\")", - "original_lang_name": "ganda", - "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Nyankore", "iso_1_code": null, "iso_3_code": "nyn", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5872", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5871", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Nyoro-Ganda (E.14)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5871", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Nyoro-Ganda (E.14)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Chiga", "iso_1_code": null, "iso_3_code": "cgg", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5874", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5873", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Nyoro-Ganda (E.15)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5873", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Nyoro-Ganda (E.15)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Ganda", "iso_1_code": "lg", "iso_3_code": "lug", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5876", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] } ], - "node_i": "5875", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Nyoro-Ganda (E.16)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5875", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Nyoro-Ganda (E.16)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Kenye", "iso_1_code": null, "iso_3_code": "lke", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5878", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Soga", "iso_1_code": null, "iso_3_code": "xog", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5879", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5877", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Nyoro-Ganda (E.17)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5877", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Nyoro-Ganda (E.17)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Gwere", "iso_1_code": null, "iso_3_code": "gwr", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5881", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5880", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Ruanda-Rundi (D.61)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5880", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Ruanda-Rundi (D.61)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Kinyarwanda", "iso_1_code": "rw", "iso_3_code": "kin", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5883", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5882", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Ruanda-Rundi (D.62)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5882", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Ruanda-Rundi (D.62)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Rundi", "iso_1_code": "rn", "iso_3_code": "run", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5885", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5884", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ruanda-Rundi (D.63)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Fuliiru", "iso_1_code": null, "iso_3_code": "flr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5887", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5886", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ruanda-Rundi (D.631)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kiviila", "iso_1_code": null, "iso_3_code": "job", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5889", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5888", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ruanda-Rundi (D.64)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Shubi", "iso_1_code": null, "iso_3_code": "suj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5891", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5890", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ruanda-Rundi (D.65)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hangaza", "iso_1_code": null, "iso_3_code": "han", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5893", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5892", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ruanda-Rundi (D.66)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ha", "iso_1_code": null, "iso_3_code": "haq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5895", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5894", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ruanda-Rundi (D.67)", "iso_1_code": null, "iso_3_code": null, + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5896", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shi-Hunde (D.501)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nyindu", "iso_1_code": null, "iso_3_code": "nyg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5898", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5897", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shi-Hunde (D.51)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hunde", "iso_1_code": null, "iso_3_code": "hke", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5900", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5899", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shi-Hunde (D.52)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"lg\")", - "original_lang_name": "ganda", - "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Havu", "iso_1_code": null, "iso_3_code": "hav", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5902", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5901", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Shi-Hunde (D.53)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5901", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Shi-Hunde (D.53)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Shi", "iso_1_code": null, "iso_3_code": "shr", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"lg\")", "original_lang_name": "ganda", "original_lang_code": "lug", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5904", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5903", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shi-Hunde (D.531)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tembo", "iso_1_code": null, "iso_3_code": "tbt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5906", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5905", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shi-Hunde (D.56)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kabwari", "iso_1_code": null, "iso_3_code": "kcw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5908", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5907", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"lg\")", + "original_lang_name": "ganda", + "original_lang_code": "lug", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5791", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "K", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ciokwe-Luchazi (K.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Chokwe", "iso_1_code": null, "iso_3_code": "cjk", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5911", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5910", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Ciokwe-Luchazi (K.12)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5910", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Ciokwe-Luchazi (K.12)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Luimbi", "iso_1_code": null, "iso_3_code": "lum", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5913", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyemba", "iso_1_code": null, "iso_3_code": "nba", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5914", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5912", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ciokwe-Luchazi (K.13)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Luchazi", "iso_1_code": null, "iso_3_code": "lch", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5916", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5915", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ciokwe-Luchazi (K.14)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Luvale", "iso_1_code": null, "iso_3_code": "lue", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5918", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5917", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Ciokwe-Luchazi (K.15)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5917", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Ciokwe-Luchazi (K.15)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Mbunda", "iso_1_code": null, "iso_3_code": "mck", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5920", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5919", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ciokwe-Luchazi (K.16)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nyengo", "iso_1_code": null, "iso_3_code": "nye", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5922", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yauma", "iso_1_code": null, "iso_3_code": "yax", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5923", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5921", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ciokwe-Luchazi (K.17)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mbwela", "iso_1_code": null, "iso_3_code": "mfu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5925", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5924", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ciokwe-Luchazi (K.18)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nkangala", "iso_1_code": null, "iso_3_code": "nkn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5927", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5926", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lozi (K.21)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Lozi", "iso_1_code": null, "iso_3_code": "loz", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5929", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5928", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Luyana (K.31)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Luyana", "iso_1_code": null, "iso_3_code": "lyn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5931", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5930", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Luyana (K.32)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mbowe", "iso_1_code": null, "iso_3_code": "mxo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5933", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5932", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Luyana (K.33)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kwangali", "iso_1_code": null, "iso_3_code": "kwn", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5935", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5934", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Luyana (K.332)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5934", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Luyana (K.332)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Gciriku", "iso_1_code": null, "iso_3_code": "diu", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5937", + "native_tokenizers": [], "scripts": [ - "Latn" - ], - "own_tokenizer": false - } - ], - "node_i": "5936", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Luyana (K.333)", - "iso_1_code": null, - "iso_3_code": null, + "Latn" + ] + } + ], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5936", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Luyana (K.333)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Mbukushu", "iso_1_code": null, "iso_3_code": "mhw", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5939", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5938", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Luyana (K.34)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mashi", "iso_1_code": null, "iso_3_code": "mho", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5941", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5940", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Luyana (K.35)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Simaa", "iso_1_code": null, "iso_3_code": "sie", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5943", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5942", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Subiya-Totela (K.402)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Fwe", "iso_1_code": null, "iso_3_code": "fwe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5945", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5944", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Subiya-Totela (K.41)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Totela", "iso_1_code": null, "iso_3_code": "ttl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5947", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5946", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Subiya-Totela (K.42)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kuhane", "iso_1_code": null, "iso_3_code": "sbs", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5949", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5948", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "5909", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "L", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5909", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "L", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Kaonde (L.41)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kaonde", "iso_1_code": null, "iso_3_code": "kqn", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5952", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5951", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Luba (L.31)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5951", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Luba (L.31)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Luba-Kasai", "iso_1_code": null, "iso_3_code": "lua", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5954", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5953", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Luba (L.32)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5953", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Luba (L.32)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Kanyok", "iso_1_code": null, "iso_3_code": "kny", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5956", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5955", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Luba (L.33)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5955", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Luba (L.33)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Luba-Katanga", "iso_1_code": "lu", "iso_3_code": "lub", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5958", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5957", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Luba (L.34)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hemba", "iso_1_code": null, "iso_3_code": "hem", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5960", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5959", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Luba (L.35)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sanga", "iso_1_code": null, "iso_3_code": "sng", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5962", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5961", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lunda (L.51)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Salampasu", "iso_1_code": null, "iso_3_code": "slx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5964", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5963", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lunda (L.52)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Lunda", "iso_1_code": null, "iso_3_code": "lun", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5966", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5965", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Lunda (L.53)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5965", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Lunda (L.53)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Ruund", "iso_1_code": null, "iso_3_code": "rnd", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5968", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5967", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nkoya (L.60)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nkoya", "iso_1_code": null, "iso_3_code": "nka", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5970", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5969", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pende (L.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Phende", "iso_1_code": null, "iso_3_code": "pem", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5972", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5971", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pende (L.12)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Holu", "iso_1_code": null, "iso_3_code": "hol", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5974", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Samba", "iso_1_code": null, "iso_3_code": "smx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5975", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5973", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pende (L.13)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kwese", "iso_1_code": null, "iso_3_code": "kws", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5977", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5976", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Songe (L.21)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kete", "iso_1_code": null, "iso_3_code": "kcv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5979", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5978", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Songe (L.221)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lwalu", "iso_1_code": null, "iso_3_code": "lwa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5981", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5980", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Songe (L.23)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Songe", "iso_1_code": null, "iso_3_code": "sop", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5983", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5982", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Songe (L.231)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bindji", "iso_1_code": null, "iso_3_code": "bpj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5985", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5984", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Songe (L.24)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Luna", "iso_1_code": null, "iso_3_code": "luj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5987", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5986", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "5950", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "M", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5950", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "M", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Bemba (M.401)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bwile", "iso_1_code": null, "iso_3_code": "bwc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5990", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5989", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bemba (M.402)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aushi", "iso_1_code": null, "iso_3_code": "auh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5992", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5991", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bemba (M.41)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Taabwa", "iso_1_code": null, "iso_3_code": "tap", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5994", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "5993", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Bemba (M.42)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5993", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Bemba (M.42)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Bemba", "iso_1_code": null, "iso_3_code": "bem", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "5996", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5995", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Fipa-Mambwe (M.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pimbwe", "iso_1_code": null, "iso_3_code": "piw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "5998", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5997", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Fipa-Mambwe (M.12)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Rungwa", "iso_1_code": null, "iso_3_code": "rnw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6000", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "5999", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Fipa-Mambwe (M.13)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Fipa", "iso_1_code": null, "iso_3_code": "fip", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6002", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6001", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Fipa-Mambwe (M.14)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Mambwe-Lungu", "iso_1_code": null, "iso_3_code": "mgr", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6004", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6003", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lala-Bisa-Lamba (M.51)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lala-Bisa", "iso_1_code": null, "iso_3_code": "leb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6006", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6005", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lala-Bisa-Lamba (M.54)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Lamba", "iso_1_code": null, "iso_3_code": "lam", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6008", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6007", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lala-Bisa-Lamba (M.55)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Seba", "iso_1_code": null, "iso_3_code": "kdg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6010", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6009", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lenje-Tonga (M.61)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Lenje", "iso_1_code": null, "iso_3_code": "leh", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6012", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "6011", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Lenje-Tonga (M.62)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6011", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Lenje-Tonga (M.62)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Soli", "iso_1_code": null, "iso_3_code": "sby", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6014", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "6013", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Lenje-Tonga (M.63)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6013", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Lenje-Tonga (M.63)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Ila", "iso_1_code": null, "iso_3_code": "ilb", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6016", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6015", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lenje-Tonga (M.631)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sala", "iso_1_code": null, "iso_3_code": "shq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6018", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6017", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lenje-Tonga (M.64)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Dombe", "iso_1_code": null, "iso_3_code": "dov", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6020", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tonga", "iso_1_code": null, "iso_3_code": "toi", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6021", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "6019", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Nyakyusa-Ngonde (M.301)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6019", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Nyakyusa-Ngonde (M.301)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Ndali", "iso_1_code": null, "iso_3_code": "ndh", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6023", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "6022", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Nyakyusa-Ngonde (M.31)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6022", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Nyakyusa-Ngonde (M.31)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Nyakyusa-Ngonde", "iso_1_code": null, "iso_3_code": "nyy", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6025", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "6024", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Nyiha-Safwa (M.201)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6024", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Nyiha-Safwa (M.201)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Lambya", "iso_1_code": null, "iso_3_code": "lai", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6027", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6026", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyiha-Safwa (M.21)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Wanda", "iso_1_code": null, "iso_3_code": "wbh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6029", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6028", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyiha-Safwa (M.22)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Nyamwanga", "iso_1_code": null, "iso_3_code": "mwn", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6031", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6030", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyiha-Safwa (M.23)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nyiha, Tanzania", "iso_1_code": null, "iso_3_code": "nih", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6033", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyika, Tanzania", "iso_1_code": null, "iso_3_code": "nkt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6034", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyika", "iso_1_code": null, "iso_3_code": "nkv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6035", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyiha, Malawi", "iso_1_code": null, "iso_3_code": "nyr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6036", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6032", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyiha-Safwa (M.24)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Malila", "iso_1_code": null, "iso_3_code": "mgq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6038", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6037", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyiha-Safwa (M.25)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Safwa", "iso_1_code": null, "iso_3_code": "sbk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6040", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6039", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "5988", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "N", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5988", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "N", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Chewa-Nyanja (N.31)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Chichewa", "iso_1_code": "ny", "iso_3_code": "nya", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6043", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6042", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manda (N.101)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ndendeule", "iso_1_code": null, "iso_3_code": "dne", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6045", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ndwewe", "iso_1_code": null, "iso_3_code": "nww", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6046", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6044", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manda (N.102)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nindi", "iso_1_code": null, "iso_3_code": "nxi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6048", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6047", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manda (N.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Manda", "iso_1_code": null, "iso_3_code": "mgs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6050", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6049", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manda (N.12)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chingoni", "iso_1_code": null, "iso_3_code": "xnj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6052", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Xingoni", "iso_1_code": null, "iso_3_code": "xnq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6053", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6051", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manda (N.13)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Matengo", "iso_1_code": null, "iso_3_code": "mgv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6055", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6054", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manda (N.14)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mpoto", "iso_1_code": null, "iso_3_code": "mpa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6057", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6056", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manda (N.15)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Tonga", "iso_1_code": null, "iso_3_code": "tog", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6059", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "6058", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Senga-Sena (N.41)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6058", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Senga-Sena (N.41)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Nsenga", "iso_1_code": null, "iso_3_code": "nse", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6061", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Phimbi", "iso_1_code": null, "iso_3_code": "phm", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6062", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6060", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Senga-Sena (N.42)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kunda", "iso_1_code": null, "iso_3_code": "kdn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6064", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "6063", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Senga-Sena (N.43)", - "iso_1_code": null, - "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, + "tokenizers": {}, + "node_i": "6063", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Senga-Sena (N.43)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Nyungwe", "iso_1_code": null, "iso_3_code": "nyu", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6066", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "6065", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Senga-Sena (N.44)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6065", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Senga-Sena (N.44)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Barwe", "iso_1_code": null, "iso_3_code": "bwg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6068", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sena", "iso_1_code": null, "iso_3_code": "seh", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6069", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "6067", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Senga-Sena (N.441)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6067", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Senga-Sena (N.441)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Sena, Malawi", "iso_1_code": null, "iso_3_code": "swk", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6071", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6070", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tumbuka (N.201)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mwera", "iso_1_code": null, "iso_3_code": "mjh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6073", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6072", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tumbuka (N.21)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Tumbuka", "iso_1_code": null, "iso_3_code": "tum", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6075", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6074", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "6041", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "P", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6041", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "P", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Makhuwa (P.31)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kokola", "iso_1_code": null, "iso_3_code": "kzn", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6078", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Lolo", "iso_1_code": null, "iso_3_code": "llb", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6079", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Makhuwa-Meetto", "iso_1_code": null, "iso_3_code": "mgh", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6080", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Manyawa", "iso_1_code": null, "iso_3_code": "mny", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6081", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Takwane", "iso_1_code": null, "iso_3_code": "tke", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6082", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Makhuwa-Shirima", "iso_1_code": null, "iso_3_code": "vmk", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6083", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Marenje", "iso_1_code": null, "iso_3_code": "vmr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6084", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makhuwa", "iso_1_code": null, "iso_3_code": "vmw", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6085", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Makhuwa-Marrevone", "iso_1_code": null, "iso_3_code": "xmc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6086", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makhuwa-Saka", "iso_1_code": null, "iso_3_code": "xsq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6087", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "6077", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Makhuwa (P.311)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6077", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Makhuwa (P.311)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Koti", "iso_1_code": null, "iso_3_code": "eko", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6089", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6088", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makhuwa (P.312)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nathembo", "iso_1_code": null, "iso_3_code": "nte", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6091", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6090", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makhuwa (P.32)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Lomwe", "iso_1_code": null, "iso_3_code": "ngl", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6093", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6092", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makhuwa (P.331)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lomwe, Malawi", "iso_1_code": null, "iso_3_code": "lon", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6095", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6094", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makhuwa (P.34)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Chuwabu", "iso_1_code": null, "iso_3_code": "chw", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6097", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Maindo", "iso_1_code": null, "iso_3_code": "cwb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6098", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6096", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makhuwa (P.341)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Makhuwa-Moniga", "iso_1_code": null, "iso_3_code": "mhm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6100", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6099", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Matuumbi (P.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ndengereko", "iso_1_code": null, "iso_3_code": "ndg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6102", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6101", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Matuumbi (P.12)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Rufiji", "iso_1_code": null, "iso_3_code": "rui", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6104", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6103", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Matuumbi (P.13)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Matumbi", "iso_1_code": null, "iso_3_code": "mgw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6106", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6105", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Matuumbi (P.14)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ngindo", "iso_1_code": null, "iso_3_code": "nnq", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6108", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6107", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Matuumbi (P.15)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mbunga", "iso_1_code": null, "iso_3_code": "mgy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6110", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6109", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yao (P.21)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Yao", "iso_1_code": null, "iso_3_code": "yao", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6112", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6111", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yao (P.22)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mwera", "iso_1_code": null, "iso_3_code": "mwe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6114", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6113", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yao (P.23)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Makonde", "iso_1_code": null, "iso_3_code": "kde", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6116", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Machinga", "iso_1_code": null, "iso_3_code": "mvw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6117", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Matambwe", "iso_1_code": null, "iso_3_code": "wtb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6118", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6115", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yao (P.24)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ndonde Hamba", "iso_1_code": null, "iso_3_code": "njd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6120", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6119", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "6076", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "R", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6076", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "R", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Herero (R.30)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Herero", "iso_1_code": "hz", "iso_3_code": "her", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6123", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "6122", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Herero (R.311)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6122", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Herero (R.311)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Dhimba", "iso_1_code": null, "iso_3_code": "dhm", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6125", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6124", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Umbundu (R.101)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ngendelengo", "iso_1_code": null, "iso_3_code": "nql", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6127", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuvale", "iso_1_code": null, "iso_3_code": "olu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6128", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6126", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Umbundu (R.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Umbundu", "iso_1_code": null, "iso_3_code": "umb", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6130", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6129", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Umbundu (R.12)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ndombe", "iso_1_code": null, "iso_3_code": "ndq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6132", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6131", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Umbundu (R.13)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Nyaneka", "iso_1_code": null, "iso_3_code": "nyk", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6134", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kwandu", "iso_1_code": null, "iso_3_code": "xdo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6135", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6133", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Umbundu (R.14)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nkumbi", "iso_1_code": null, "iso_3_code": "khu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6137", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6136", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wambo (R.21)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Oshiwambo", "iso_1_code": "kj", "iso_3_code": "kua", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6139", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6138", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wambo (R.214)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mbalanhu", "iso_1_code": null, "iso_3_code": "lnb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6141", - "scripts": [], - "own_tokenizer": false - } - ], - "node_i": "6140", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Wambo (R.22)", - "iso_1_code": null, - "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "native_tokenizers": [], + "scripts": [] } - }, + ], + "tokenizers": {}, + "node_i": "6140", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Wambo (R.22)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Ndonga", "iso_1_code": "ng", "iso_3_code": "ndo", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6143", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6142", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wambo (R.23)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kwambi", "iso_1_code": null, "iso_3_code": "kwm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6145", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6144", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wambo (R.24)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ngandjera", "iso_1_code": null, "iso_3_code": "nne", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6147", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6146", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yeyi (R.41)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yeyi", "iso_1_code": null, "iso_3_code": "yey", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6149", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6148", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "6121", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "S", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6121", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "S", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Copi (S.61)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Chopi", "iso_1_code": null, "iso_3_code": "cce", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6152", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "6151", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Copi (S.62)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6151", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Copi (S.62)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Tonga", "iso_1_code": null, "iso_3_code": "toh", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6154", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "6153", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Nguni (S.407)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6153", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Nguni (S.407)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Ndebele", "iso_1_code": "nr", "iso_3_code": "nbl", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6156", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "6155", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Nguni (S.41)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6155", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Nguni (S.41)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Xhosa", "iso_1_code": "xh", "iso_3_code": "xho", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6158", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "6157", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Nguni (S.42)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6157", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Nguni (S.42)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Zulu", "iso_1_code": "zu", "iso_3_code": "zul", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6160", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "6159", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Nguni (S.43)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6159", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Nguni (S.43)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Swati", "iso_1_code": "ss", "iso_3_code": "ssw", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6162", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "6161", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Nguni (S.44)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6161", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Nguni (S.44)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Ndebele", "iso_1_code": "nd", "iso_3_code": "nde", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6164", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "6163", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Shona (S.10)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6163", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Shona (S.10)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Dema", "iso_1_code": null, "iso_3_code": "dmx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6166", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shona", "iso_1_code": "sn", "iso_3_code": "sna", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6167", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6165", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shona (S.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tawara", "iso_1_code": null, "iso_3_code": "twl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6169", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6168", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shona (S.13)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Manyika", "iso_1_code": null, "iso_3_code": "mxc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6171", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tewe", "iso_1_code": null, "iso_3_code": "twx", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6172", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "6170", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Shona (S.15)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6170", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Shona (S.15)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Ndau", "iso_1_code": null, "iso_3_code": "ndc", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6174", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "6173", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Shona (S.16)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6173", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Shona (S.16)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Kalanga", "iso_1_code": null, "iso_3_code": "kck", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6176", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nambya", "iso_1_code": null, "iso_3_code": "nmq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6177", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "6175", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Sotho-Tswana (S.31)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6175", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Sotho-Tswana (S.31)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Setswana", "iso_1_code": "tn", "iso_3_code": "tsn", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6179", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6178", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sotho-Tswana (S.311)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kgalagadi", "iso_1_code": null, "iso_3_code": "xkv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6181", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6180", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sotho-Tswana (S.32)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Birwa", "iso_1_code": null, "iso_3_code": "brl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6183", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sotho, Northern", "iso_1_code": null, "iso_3_code": "nso", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6184", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tswapong", "iso_1_code": null, "iso_3_code": "two", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6185", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "6182", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Sotho-Tswana (S.33)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6182", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Sotho-Tswana (S.33)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Sotho, Southern", "iso_1_code": "st", "iso_3_code": "sot", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6187", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "6186", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Tswa-Rhonga (S.51)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6186", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Tswa-Rhonga (S.51)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Tswa", "iso_1_code": null, "iso_3_code": "tsc", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6189", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "6188", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Tswa-Rhonga (S.53)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6188", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Tswa-Rhonga (S.53)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Tsonga", "iso_1_code": "ts", "iso_3_code": "tso", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6191", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "6190", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Tswa-Rhonga (S.54)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6190", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Tswa-Rhonga (S.54)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Ronga", "iso_1_code": null, "iso_3_code": "rng", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6193", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "6192", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Venda (S.21)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6192", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Venda (S.21)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Venda", "iso_1_code": "ve", "iso_3_code": "ven", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6195", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6194", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6150", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "5532", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Northwest", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5532", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Northwest", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "A", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bafia (A.501)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hijuk", "iso_1_code": null, "iso_3_code": "hij", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6199", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6198", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bafia (A.51)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lefa", "iso_1_code": null, "iso_3_code": "lfa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6201", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6200", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bafia (A.52)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dimbong", "iso_1_code": null, "iso_3_code": "dii", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6203", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "6202", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Bafia (A.53)", - "iso_1_code": null, - "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, + "tokenizers": {}, + "node_i": "6202", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Bafia (A.53)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Bafia", "iso_1_code": null, "iso_3_code": "ksf", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6205", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6204", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bafia (A.54)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tibea", "iso_1_code": null, "iso_3_code": "ngy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6207", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6206", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Basaa (A.41)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Barombi", "iso_1_code": null, "iso_3_code": "bbi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6209", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6208", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Basaa (A.42)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bankon", "iso_1_code": null, "iso_3_code": "abb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6211", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6210", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Basaa (A.43)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Basaa", "iso_1_code": null, "iso_3_code": "bas", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6213", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bakoko", "iso_1_code": null, "iso_3_code": "bkh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6214", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6212", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Basaa (A.44)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tunen", "iso_1_code": null, "iso_3_code": "tvu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6216", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6215", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Basaa (A.45)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nyokon", "iso_1_code": null, "iso_3_code": "nvo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6218", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6217", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Basaa (A.46)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Nomaande", "iso_1_code": null, "iso_3_code": "lem", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6220", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6219", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Basaa (A.461)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tuotomb", "iso_1_code": null, "iso_3_code": "ttf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6222", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6221", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Basaa (A.462)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Yambeta", "iso_1_code": null, "iso_3_code": "yat", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6224", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6223", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bubi-Benga (A.31)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bubia", "iso_1_code": null, "iso_3_code": "bbx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6226", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bube", "iso_1_code": null, "iso_3_code": "bvb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6227", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6225", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bubi-Benga (A.32)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Batanga", "iso_1_code": null, "iso_3_code": "bnm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6229", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6228", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bubi-Benga (A.33)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kombe", "iso_1_code": null, "iso_3_code": "nui", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6231", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Iyasa", "iso_1_code": null, "iso_3_code": "yko", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6232", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6230", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bubi-Benga (A.34)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Benga", "iso_1_code": null, "iso_3_code": "bng", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6234", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6233", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Duala (A.21)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Wumboko", "iso_1_code": null, "iso_3_code": "bqm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6236", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6235", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Duala (A.22)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mokpwe", "iso_1_code": null, "iso_3_code": "bri", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6238", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6237", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Duala (A.23)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Isubu", "iso_1_code": null, "iso_3_code": "szv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6240", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6239", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Duala (A.231)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bakole", "iso_1_code": null, "iso_3_code": "kme", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6242", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6241", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Duala (A.24)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Duala", "iso_1_code": null, "iso_3_code": "dua", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6244", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6243", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Duala (A.27)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mulimba", "iso_1_code": null, "iso_3_code": "mzd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6246", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6245", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ewondo-Fang (A.71)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Eton", "iso_1_code": null, "iso_3_code": "eto", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6248", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mengisa", "iso_1_code": null, "iso_3_code": "mct", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6249", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "6247", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Ewondo-Fang (A.72)", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6247", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Ewondo-Fang (A.72)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Ewondo", "iso_1_code": null, "iso_3_code": "ewo", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6251", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6250", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ewondo-Fang (A.73)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bebele", "iso_1_code": null, "iso_3_code": "beb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6253", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bebil", "iso_1_code": null, "iso_3_code": "bxp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6254", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6252", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ewondo-Fang (A.74)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bulu", "iso_1_code": null, "iso_3_code": "bum", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], - "node_i": "6256", - "scripts": [ - "Latn" - ], - "own_tokenizer": false - } - ], - "node_i": "6255", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Ewondo-Fang (A.75)", - "iso_1_code": null, - "iso_3_code": null, + "node_i": "6256", + "native_tokenizers": [], + "scripts": [ + "Latn" + ] + } + ], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6255", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Ewondo-Fang (A.75)", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Fang", "iso_1_code": null, "iso_3_code": "fan", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6258", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6257", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaka (A.91)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kwakum", "iso_1_code": null, "iso_3_code": "kwu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6260", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6259", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaka (A.92)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pol", "iso_1_code": null, "iso_3_code": "pmm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6262", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6261", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaka (A.93)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kako", "iso_1_code": null, "iso_3_code": "kkj", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6264", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6263", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lundu-Balong (A.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Oroko", "iso_1_code": null, "iso_3_code": "bdu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6266", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6265", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lundu-Balong (A.13)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bafaw-Balong", "iso_1_code": null, "iso_3_code": "bwt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6268", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6267", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lundu-Balong (A.14)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bonkeng", "iso_1_code": null, "iso_3_code": "bvg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6270", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6269", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lundu-Balong (A.15)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mbo", "iso_1_code": null, "iso_3_code": "mbo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6272", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6271", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lundu-Balong (A.151)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nkongho", "iso_1_code": null, "iso_3_code": "nkc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6274", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6273", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lundu-Balong (A.15B)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bassossi", "iso_1_code": null, "iso_3_code": "bsi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6276", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6275", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lundu-Balong (A.15C)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bakaka", "iso_1_code": null, "iso_3_code": "bqz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6278", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Akoose", "iso_1_code": null, "iso_3_code": "bss", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6279", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6277", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makaa-Njem (A.801)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gyele", "iso_1_code": null, "iso_3_code": "gyi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6281", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6280", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makaa-Njem (A.802)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ukhwejo", "iso_1_code": null, "iso_3_code": "ukh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6283", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6282", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makaa-Njem (A.81)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kwasio", "iso_1_code": null, "iso_3_code": "nmg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6285", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6284", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makaa-Njem (A.82)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Swo", "iso_1_code": null, "iso_3_code": "sox", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6287", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6286", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makaa-Njem (A.83)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Makaa", "iso_1_code": null, "iso_3_code": "mcp", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6289", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6288", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makaa-Njem (A.831)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Byep", "iso_1_code": null, "iso_3_code": "mkk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6291", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6290", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makaa-Njem (A.832)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kol", "iso_1_code": null, "iso_3_code": "biw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6293", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6292", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makaa-Njem (A.84)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Njyem", "iso_1_code": null, "iso_3_code": "njy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6295", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6294", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makaa-Njem (A.842)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Koonzime", "iso_1_code": null, "iso_3_code": "ozm", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6297", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6296", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makaa-Njem (A.85)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bekwel", "iso_1_code": null, "iso_3_code": "bkw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6299", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6298", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makaa-Njem (A.86)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mpiemo", "iso_1_code": null, "iso_3_code": "mcx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6301", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mpumpong", "iso_1_code": null, "iso_3_code": "mgg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6302", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6300", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makaa-Njem (A.87)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bomwali", "iso_1_code": null, "iso_3_code": "bmw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6304", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6303", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sanaga (A.601)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tuki", "iso_1_code": null, "iso_3_code": "bag", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6306", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Leti", "iso_1_code": null, "iso_3_code": "leo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6307", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6305", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sanaga (A.62)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nulibie", "iso_1_code": null, "iso_3_code": "ekm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6309", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Numala", "iso_1_code": null, "iso_3_code": "mmu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6310", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yangben", "iso_1_code": null, "iso_3_code": "yav", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6311", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6308", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sanaga (A.621)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nubaca", "iso_1_code": null, "iso_3_code": "baf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6313", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6312", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sanaga (A.622)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Nugunu", "iso_1_code": null, "iso_3_code": "yas", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6315", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6314", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sanaga (A.623)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mbule", "iso_1_code": null, "iso_3_code": "mlb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6317", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6316", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sanaga (A.65)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bati", "iso_1_code": null, "iso_3_code": "btc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6319", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6318", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6197", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "B", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kele (B.201)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ndasa", "iso_1_code": null, "iso_3_code": "nda", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6322", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6321", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kele (B.202)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sighu", "iso_1_code": null, "iso_3_code": "sxe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6324", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6323", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kele (B.203)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Samay", "iso_1_code": null, "iso_3_code": "syx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6326", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6325", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kele (B.204)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ndambomo", "iso_1_code": null, "iso_3_code": "nxo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6328", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6327", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kele (B.21)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Seki", "iso_1_code": null, "iso_3_code": "syi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6330", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6329", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kele (B.211)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Molengue", "iso_1_code": null, "iso_3_code": "bxc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6332", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6331", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kele (B.22)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "K\u00e9l\u00e9", "iso_1_code": null, "iso_3_code": "keb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6334", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngom", "iso_1_code": null, "iso_3_code": "nra", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6335", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6333", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kele (B.23)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mbangwe", "iso_1_code": null, "iso_3_code": "zmn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6337", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6336", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kele (B.24)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Wumbvu", "iso_1_code": null, "iso_3_code": "wum", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6339", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6338", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kele (B.25)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kota", "iso_1_code": null, "iso_3_code": "koq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6341", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6340", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kele (B.251)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sake", "iso_1_code": null, "iso_3_code": "sak", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6343", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6342", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kele (B.252)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mahongwe", "iso_1_code": null, "iso_3_code": "mhb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6345", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6344", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbete (B.602)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kaningi", "iso_1_code": null, "iso_3_code": "kzo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6347", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6346", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbete (B.61)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mbere", "iso_1_code": null, "iso_3_code": "mdt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6349", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6348", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbete (B.62)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ombamba", "iso_1_code": null, "iso_3_code": "mbm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6351", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6350", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbete (B.63)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ndumu", "iso_1_code": null, "iso_3_code": "nmd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6353", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6352", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Myene (B.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Myene", "iso_1_code": null, "iso_3_code": "mye", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6355", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6354", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nzebi (B.501)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Wandji", "iso_1_code": null, "iso_3_code": "wdd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6357", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6356", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nzebi (B.51)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Duma", "iso_1_code": null, "iso_3_code": "dma", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6359", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6358", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nzebi (B.52)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Njebi", "iso_1_code": null, "iso_3_code": "nzb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6361", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6360", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nzebi (B.53)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tsaangi", "iso_1_code": null, "iso_3_code": "tsa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6363", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6362", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shira-Punu (B.401)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bwisi", "iso_1_code": null, "iso_3_code": "bwz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6365", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6364", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shira-Punu (B.402)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Barama", "iso_1_code": null, "iso_3_code": "bbg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6367", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6366", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shira-Punu (B.403)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Vumbu", "iso_1_code": null, "iso_3_code": "vum", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6369", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6368", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shira-Punu (B.41)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sira", "iso_1_code": null, "iso_3_code": "swj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6371", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6370", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shira-Punu (B.42)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sangu", "iso_1_code": null, "iso_3_code": "snq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6373", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6372", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shira-Punu (B.43)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Punu", "iso_1_code": null, "iso_3_code": "puu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6375", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6374", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shira-Punu (B.44)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lumbu", "iso_1_code": null, "iso_3_code": "lup", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6377", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6376", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teke (B.701)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tchitchege", "iso_1_code": null, "iso_3_code": "tck", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6379", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6378", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teke (B.71)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Teke-Tege", "iso_1_code": null, "iso_3_code": "teg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6381", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6380", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teke (B.72)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ngungwel", "iso_1_code": null, "iso_3_code": "ngz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6383", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6382", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teke (B.73)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yaka", "iso_1_code": null, "iso_3_code": "iyx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6385", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teke-Laali", "iso_1_code": null, "iso_3_code": "lli", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6386", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teke-Tsaayi", "iso_1_code": null, "iso_3_code": "tyi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6387", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teke-Tyee", "iso_1_code": null, "iso_3_code": "tyx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6388", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6384", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teke (B.74)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Teke-Eboo", "iso_1_code": null, "iso_3_code": "ebo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6390", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teke-Nzikou", "iso_1_code": null, "iso_3_code": "nzu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6391", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6389", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teke (B.75)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Teke, Ibali", "iso_1_code": null, "iso_3_code": "tek", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6393", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6392", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teke (B.77)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Teke-Wuumu", "iso_1_code": null, "iso_3_code": "ifm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6395", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teke-Kukuya", "iso_1_code": null, "iso_3_code": "kkw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6396", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6394", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tiene-Yanzi (B.81)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tiene", "iso_1_code": null, "iso_3_code": "tii", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6398", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6397", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tiene-Yanzi (B.82)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Boma", "iso_1_code": null, "iso_3_code": "boh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6400", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6399", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tiene-Yanzi (B.83)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mfinu", "iso_1_code": null, "iso_3_code": "zmf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6402", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6401", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tiene-Yanzi (B.84)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mbuun", "iso_1_code": null, "iso_3_code": "zmp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6404", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6403", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tiene-Yanzi (B.85)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Songo", "iso_1_code": null, "iso_3_code": "soo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6406", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Iyansi", "iso_1_code": null, "iso_3_code": "yns", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6407", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6405", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tiene-Yanzi (B.86)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ding", "iso_1_code": null, "iso_3_code": "diz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6409", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6408", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tiene-Yanzi (B.861)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ngwii", "iso_1_code": null, "iso_3_code": "nlo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6411", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6410", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tiene-Yanzi (B.862)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lwel", "iso_1_code": null, "iso_3_code": "lvl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6413", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6412", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tiene-Yanzi (B.864)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ngongo", "iso_1_code": null, "iso_3_code": "noq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6415", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6414", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tiene-Yanzi (B.865)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nzadi", "iso_1_code": null, "iso_3_code": "nzd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6417", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6416", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tsogo (B.301)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Eviya", "iso_1_code": null, "iso_3_code": "gev", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6419", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6418", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tsogo (B.302)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Simba", "iso_1_code": null, "iso_3_code": "sbw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6421", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6420", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tsogo (B.304)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pinji", "iso_1_code": null, "iso_3_code": "pic", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6423", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6422", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tsogo (B.305)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bubi", "iso_1_code": null, "iso_3_code": "buw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6425", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6424", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tsogo (B.31)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tsogo", "iso_1_code": null, "iso_3_code": "tsv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6427", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6426", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tsogo (B.32)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kande", "iso_1_code": null, "iso_3_code": "kbs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6429", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6428", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6320", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "C", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bangi-Ntomba (C.30)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bangala", "iso_1_code": null, "iso_3_code": "bxg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6432", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lingala", "iso_1_code": "ln", "iso_3_code": "lin", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6433", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6431", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bangi-Ntomba (C.302)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bolondo", "iso_1_code": null, "iso_3_code": "bzm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6435", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6434", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bangi-Ntomba (C.31)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Baloi", "iso_1_code": null, "iso_3_code": "biz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6437", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Likila", "iso_1_code": null, "iso_3_code": "lie", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6438", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6436", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bangi-Ntomba (C.311)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mabaale", "iso_1_code": null, "iso_3_code": "mmz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6440", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6439", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bangi-Ntomba (C.312)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ndobo", "iso_1_code": null, "iso_3_code": "ndw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6442", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6441", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bangi-Ntomba (C.32)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bangi", "iso_1_code": null, "iso_3_code": "bni", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6444", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Moi", "iso_1_code": null, "iso_3_code": "mow", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6445", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6443", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bangi-Ntomba (C.321)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Libinza", "iso_1_code": null, "iso_3_code": "liz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6447", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6446", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bangi-Ntomba (C.33)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sengele", "iso_1_code": null, "iso_3_code": "szg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6449", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6448", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bangi-Ntomba (C.34)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sakata", "iso_1_code": null, "iso_3_code": "skt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6451", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6450", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bangi-Ntomba (C.35)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bolia", "iso_1_code": null, "iso_3_code": "bli", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6453", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ntomba", "iso_1_code": null, "iso_3_code": "nto", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6454", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6452", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bangi-Ntomba (C.36)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Boloki", "iso_1_code": null, "iso_3_code": "bkt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6456", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lusengo", "iso_1_code": null, "iso_3_code": "lse", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6457", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ndolo", "iso_1_code": null, "iso_3_code": "ndl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6458", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yamongeri", "iso_1_code": null, "iso_3_code": "ymg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6459", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6455", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bangi-Ntomba (C.37)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Budja", "iso_1_code": null, "iso_3_code": "bja", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6461", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6460", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bangi-Ntomba (C.371)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tembo", "iso_1_code": null, "iso_3_code": "tmv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6463", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6462", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bushoong (C.81)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dengese", "iso_1_code": null, "iso_3_code": "dez", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6465", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6464", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bushoong (C.82)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ohendo", "iso_1_code": null, "iso_3_code": "soe", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6467", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6466", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bushoong (C.83)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bushoong", "iso_1_code": null, "iso_3_code": "buf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6469", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6468", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bushoong (C.84)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lele", "iso_1_code": null, "iso_3_code": "lel", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6471", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6470", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bushoong (C.85)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Wongo", "iso_1_code": null, "iso_3_code": "won", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6473", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6472", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mboshi (C.21)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mboko", "iso_1_code": null, "iso_3_code": "mdu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6475", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6474", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mboshi (C.22)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Akwa", "iso_1_code": null, "iso_3_code": "akw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6477", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6476", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mboshi (C.24)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Koyo", "iso_1_code": null, "iso_3_code": "koh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6479", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6478", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mboshi (C.25)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mbosi", "iso_1_code": null, "iso_3_code": "mdw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6481", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6480", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mboshi (C.26)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Likwala", "iso_1_code": null, "iso_3_code": "kwc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6483", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6482", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mboshi (C.27)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Likuba", "iso_1_code": null, "iso_3_code": "kxx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6485", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6484", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mongo-Nkundo (C.61)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Mongo-Nkundu", "iso_1_code": null, "iso_3_code": "lol", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6487", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6486", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mongo-Nkundo (C.62)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lalia", "iso_1_code": null, "iso_3_code": "lal", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6489", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6488", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mongo-Nkundo (C.63)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ngando", "iso_1_code": null, "iso_3_code": "nxd", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6491", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6490", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngombe (C.401)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pagibete", "iso_1_code": null, "iso_3_code": "pae", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6493", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6492", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngombe (C.403)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kango", "iso_1_code": null, "iso_3_code": "kty", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6495", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6494", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngombe (C.41)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ngombe", "iso_1_code": null, "iso_3_code": "ngc", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6497", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6496", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngombe (C.411)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bomboma", "iso_1_code": null, "iso_3_code": "bws", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6499", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6498", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngombe (C.412)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bamwe", "iso_1_code": null, "iso_3_code": "bmg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6501", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6500", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngombe (C.413)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dzando", "iso_1_code": null, "iso_3_code": "dzn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6503", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6502", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngombe (C.414)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ligenza", "iso_1_code": null, "iso_3_code": "lgz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6505", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6504", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngombe (C.42)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bwela", "iso_1_code": null, "iso_3_code": "bwl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6507", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6506", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngombe (C.44)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bwa", "iso_1_code": null, "iso_3_code": "bww", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6509", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6508", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngombe (C.441)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Babango", "iso_1_code": null, "iso_3_code": "bbm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6511", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6510", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngombe (C.45)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ngelima", "iso_1_code": null, "iso_3_code": "agh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6513", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6512", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngondi (C.101)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dibole", "iso_1_code": null, "iso_3_code": "bvx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6515", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6514", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngondi (C.102)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ngando", "iso_1_code": null, "iso_3_code": "ngd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6517", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6516", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngondi (C.104)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yaka", "iso_1_code": null, "iso_3_code": "axk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6519", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6518", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngondi (C.11)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ngundi", "iso_1_code": null, "iso_3_code": "ndn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6521", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6520", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngondi (C.12)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pande", "iso_1_code": null, "iso_3_code": "bkj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6523", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6522", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngondi (C.13)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mbati", "iso_1_code": null, "iso_3_code": "mdn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6525", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6524", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngondi (C.14)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bomitaba", "iso_1_code": null, "iso_3_code": "zmx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6527", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6526", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngondi (C.143)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bonjo", "iso_1_code": null, "iso_3_code": "bok", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6529", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6528", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngondi (C.15)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bongili", "iso_1_code": null, "iso_3_code": "bui", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6531", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6530", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngondi (C.16)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Boko", "iso_1_code": null, "iso_3_code": "bkp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6533", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lobala", "iso_1_code": null, "iso_3_code": "loq", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6534", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6532", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngondi (C.161)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bomboli", "iso_1_code": null, "iso_3_code": "bml", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6536", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6535", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngondi (C.162)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bozaba", "iso_1_code": null, "iso_3_code": "bzo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6538", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6537", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Soko-Kele (C.51)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mbesa", "iso_1_code": null, "iso_3_code": "zms", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6540", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6539", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Soko-Kele (C.52)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "So", "iso_1_code": null, "iso_3_code": "soc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6542", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6541", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Soko-Kele (C.53)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Poke", "iso_1_code": null, "iso_3_code": "pof", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6544", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6543", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Soko-Kele (C.54)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lombo", "iso_1_code": null, "iso_3_code": "loo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6546", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6545", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Soko-Kele (C.55)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kele", "iso_1_code": null, "iso_3_code": "khy", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6548", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6547", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Soko-Kele (C.56)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Foma", "iso_1_code": null, "iso_3_code": "fom", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6550", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6549", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tetela (C.71)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Hamba", "iso_1_code": null, "iso_3_code": "hba", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6552", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tetela", "iso_1_code": null, "iso_3_code": "tll", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6553", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6551", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tetela (C.72)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kusu", "iso_1_code": null, "iso_3_code": "ksv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6555", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6554", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tetela (C.73)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nkutu", "iso_1_code": null, "iso_3_code": "nkw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6557", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6556", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tetela (C.74)", "iso_1_code": null, "iso_3_code": null, + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6558", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tetela (C.75)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kela", "iso_1_code": null, "iso_3_code": "kel", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6560", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6559", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tetela (C.76)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ombo", "iso_1_code": null, "iso_3_code": "oml", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6562", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6561", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6430", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6196", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5531", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ndemli", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ndemli", "iso_1_code": null, "iso_3_code": "nml", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6564", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6563", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tikar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Tikar", "iso_1_code": null, "iso_3_code": "tik", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6566", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "6565", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Tivoid", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6565", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Tivoid", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Abon", "iso_1_code": null, "iso_3_code": "abo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6568", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Esimbi", "iso_1_code": null, "iso_3_code": "ags", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6569", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ambo", "iso_1_code": null, "iso_3_code": "amb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6570", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ipulo", "iso_1_code": null, "iso_3_code": "ass", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6571", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Iceve-Maci", "iso_1_code": null, "iso_3_code": "bec", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6572", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Balo", "iso_1_code": null, "iso_3_code": "bqo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6573", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bitare", "iso_1_code": null, "iso_3_code": "brt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6574", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Batu", "iso_1_code": null, "iso_3_code": "btu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6575", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Evant", "iso_1_code": null, "iso_3_code": "bzz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6576", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Caka", "iso_1_code": null, "iso_3_code": "ckx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6577", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eman", "iso_1_code": null, "iso_3_code": "emn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6578", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mesaka", "iso_1_code": null, "iso_3_code": "iyo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6579", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manta", "iso_1_code": null, "iso_3_code": "myg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6580", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Osatu", "iso_1_code": null, "iso_3_code": "ost", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6581", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tiv", "iso_1_code": null, "iso_3_code": "tiv", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6582", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Iyive", "iso_1_code": null, "iso_3_code": "uiv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6583", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Itang", "iso_1_code": null, "iso_3_code": "uta", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6584", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6567", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Busuu", "iso_1_code": null, "iso_3_code": "bju", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6586", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bishuo", "iso_1_code": null, "iso_3_code": "bwh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6587", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bikya", "iso_1_code": null, "iso_3_code": "byb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6588", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Moingi", "iso_1_code": null, "iso_3_code": "mwz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6589", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6585", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wide Grassfields", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Menchum", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Befang", "iso_1_code": null, "iso_3_code": "bby", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6592", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6591", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Narrow Grassfields", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Fum", "iso_1_code": null, "iso_3_code": "fum", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6594", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbam-Nkam", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bamileke", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ghom\u00e1l\u00e1\u2019", "iso_1_code": null, "iso_3_code": "bbj", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6597", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kwa\u2019", "iso_1_code": null, "iso_3_code": "bko", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6598", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Fe\u2019fe\u2019", "iso_1_code": null, "iso_3_code": "fmp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6599", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngomba", "iso_1_code": null, "iso_3_code": "jgo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6600", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngombale", "iso_1_code": null, "iso_3_code": "nla", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6601", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ngiemboon", "iso_1_code": null, "iso_3_code": "nnh", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6602", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nda\u2019nda\u2019", "iso_1_code": null, "iso_3_code": "nnz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6603", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngwe", "iso_1_code": null, "iso_3_code": "nwe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6604", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mengaka", "iso_1_code": null, "iso_3_code": "xmg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6605", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yemba", "iso_1_code": null, "iso_3_code": "ybb", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6606", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "6596", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Ngemba", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6596", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Ngemba", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Awing", "iso_1_code": null, "iso_3_code": "azo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6608", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bambili-Bambui", "iso_1_code": null, "iso_3_code": "baw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6609", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bafut", "iso_1_code": null, "iso_3_code": "bfd", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6610", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Beba", "iso_1_code": null, "iso_3_code": "bfp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6611", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mankong", "iso_1_code": null, "iso_3_code": "bqt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6612", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kpati", "iso_1_code": null, "iso_3_code": "koc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6613", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mendankwe-Nkwen", "iso_1_code": null, "iso_3_code": "mfd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6614", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngemba", "iso_1_code": null, "iso_3_code": "nge", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6615", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pinyin", "iso_1_code": null, "iso_3_code": "pny", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6616", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "6607", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Nkambe", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6607", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Nkambe", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Lidzonka", "iso_1_code": null, "iso_3_code": "add", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6618", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kwaja", "iso_1_code": null, "iso_3_code": "kdz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6619", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Limbum", "iso_1_code": null, "iso_3_code": "lmp", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6620", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mbo\u2019", "iso_1_code": null, "iso_3_code": "mtk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6621", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mfumte", "iso_1_code": null, "iso_3_code": "nfu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6622", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yamba", "iso_1_code": null, "iso_3_code": "yam", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6623", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "6617", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Nun", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6617", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Nun", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Bamun", "iso_1_code": null, "iso_3_code": "bax", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6625", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chopechop", "iso_1_code": null, "iso_3_code": "bbq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6626", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Supapya", "iso_1_code": null, "iso_3_code": "bbw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6627", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mengambo", "iso_1_code": null, "iso_3_code": "bce", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6628", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chufie\u2019", "iso_1_code": null, "iso_3_code": "bfj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6629", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mendenkye", "iso_1_code": null, "iso_3_code": "bgj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6630", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chrambo", "iso_1_code": null, "iso_3_code": "bmo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6631", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Medumba", "iso_1_code": null, "iso_3_code": "byv", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6632", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mungaka", "iso_1_code": null, "iso_3_code": "mhk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6633", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6624", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "6595", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Momo", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6595", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Momo", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Menka", "iso_1_code": null, "iso_3_code": "mea", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6635", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Meta\u2019", "iso_1_code": null, "iso_3_code": "mgo", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6636", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mundani", "iso_1_code": null, "iso_3_code": "mnf", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6637", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ngamambo", "iso_1_code": null, "iso_3_code": "nbv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6638", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngie", "iso_1_code": null, "iso_3_code": "ngj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6639", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngwo", "iso_1_code": null, "iso_3_code": "ngn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6640", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Njen", "iso_1_code": null, "iso_3_code": "njj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6641", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngoshie", "iso_1_code": null, "iso_3_code": "nsh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6642", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "6634", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Ring", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [ - { - "name": "Center", - "iso_1_code": null, - "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, + "node_i": "6634", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Ring", + "iso_1_code": null, + "iso_3_code": null, + "children": [ + { + "name": "Center", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Babanki", "iso_1_code": null, "iso_3_code": "bbk", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6645", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mmen", "iso_1_code": null, "iso_3_code": "bfm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6646", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kom", "iso_1_code": null, "iso_3_code": "bkm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6647", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bum", "iso_1_code": null, "iso_3_code": "bmv", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6648", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mbessa", "iso_1_code": null, "iso_3_code": "emz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6649", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kung", "iso_1_code": null, "iso_3_code": "kfl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6650", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuk", "iso_1_code": null, "iso_3_code": "kfn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6651", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oku", "iso_1_code": null, "iso_3_code": "oku", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6652", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6644", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lamnso\u02bc", "iso_1_code": null, "iso_3_code": "lns", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6654", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6653", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Vengo", "iso_1_code": null, "iso_3_code": "bav", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6656", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Wushi", "iso_1_code": null, "iso_3_code": "bse", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6657", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngiemekohke", "iso_1_code": null, "iso_3_code": "bvm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6658", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kenswei Nsei", "iso_1_code": null, "iso_3_code": "ndb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6659", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6655", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aghem", "iso_1_code": null, "iso_3_code": "agq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6661", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Isu", "iso_1_code": null, "iso_3_code": "isu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6662", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Laimbue", "iso_1_code": null, "iso_3_code": "lmx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6663", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Weh", "iso_1_code": null, "iso_3_code": "weh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6664", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zhoa", "iso_1_code": null, "iso_3_code": "zhw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6665", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6660", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6643", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nde-Gbite", "iso_1_code": null, "iso_3_code": "ned", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6667", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Viti", "iso_1_code": null, "iso_3_code": "vit", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6668", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6666", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6593", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western Momo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ambele", "iso_1_code": null, "iso_3_code": "ael", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6670", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Atong", "iso_1_code": null, "iso_3_code": "ato", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6671", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Busam", "iso_1_code": null, "iso_3_code": "bxs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6672", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6669", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6590", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yemne-Kimbi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mundabli", "iso_1_code": null, "iso_3_code": "boe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6674", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Fang", "iso_1_code": null, "iso_3_code": "fak", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6675", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koshin", "iso_1_code": null, "iso_3_code": "kid", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6676", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mungbam", "iso_1_code": null, "iso_3_code": "mij", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6677", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ajumbu", "iso_1_code": null, "iso_3_code": "muc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6678", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6673", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5485", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "\u00c1nc\u00e1", "iso_1_code": null, "iso_3_code": "acb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6680", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Buru", "iso_1_code": null, "iso_3_code": "bqw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6681", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6679", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "5452", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Cross River", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5452", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Cross River", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Bendi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Utugwang-Irungene-Afrike", "iso_1_code": null, "iso_3_code": "afe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6684", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Elege", "iso_1_code": null, "iso_3_code": "alf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6685", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bekwarra", "iso_1_code": null, "iso_3_code": "bkv", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6686", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bokyi", "iso_1_code": null, "iso_3_code": "bky", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6687", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bete-Bendi", "iso_1_code": null, "iso_3_code": "btt", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6688", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bumaji", "iso_1_code": null, "iso_3_code": "byp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6689", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Abanglekuo", "iso_1_code": null, "iso_3_code": "bzy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6690", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ubang", "iso_1_code": null, "iso_3_code": "uba", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6691", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bukpe", "iso_1_code": null, "iso_3_code": "ukp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6692", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "6683", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Delta Cross", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6683", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Delta Cross", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Central Delta", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Abureni", "iso_1_code": null, "iso_3_code": "mgj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6695", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Obulom", "iso_1_code": null, "iso_3_code": "obu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6696", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ogbia", "iso_1_code": null, "iso_3_code": "ogb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6697", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ogbogolo", "iso_1_code": null, "iso_3_code": "ogg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6698", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ogbronuagum", "iso_1_code": null, "iso_3_code": "ogu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6699", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "O\u2019chi\u2019chi\u2019", "iso_1_code": null, "iso_3_code": "xoc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6700", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Abua-Odual", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Abua", "iso_1_code": null, "iso_3_code": "abn", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6702", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Odual", "iso_1_code": null, "iso_3_code": "odu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6703", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] + } + ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" } - ], + }, "node_i": "6701", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kugbo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kugbo", "iso_1_code": null, "iso_3_code": "kes", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6705", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6704", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "6694", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Lower Cross", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6694", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Lower Cross", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Obolo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Obolo", "iso_1_code": null, "iso_3_code": "ann", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6708", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Eki", "iso_1_code": null, "iso_3_code": "eki", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6709", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Idere", "iso_1_code": null, "iso_3_code": "ide", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6710", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ebughu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ebughu", "iso_1_code": null, "iso_3_code": "ebg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6712", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6711", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Efai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Efai", "iso_1_code": null, "iso_3_code": "efa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6714", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6713", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Efik", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Anaang", "iso_1_code": null, "iso_3_code": "anw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6716", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Efik", "iso_1_code": null, "iso_3_code": "efi", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6717", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ibibio", "iso_1_code": null, "iso_3_code": "ibb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6718", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ukwa", "iso_1_code": null, "iso_3_code": "ukq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6719", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6715", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ekit", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ekit", "iso_1_code": null, "iso_3_code": "eke", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6721", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Etebi", "iso_1_code": null, "iso_3_code": "etb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6722", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6720", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Enwang-Uda", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Enwan", "iso_1_code": null, "iso_3_code": "enw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6724", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uda", "iso_1_code": null, "iso_3_code": "uda", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6725", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6723", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ibino", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ibino", "iso_1_code": null, "iso_3_code": "ibn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6727", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6726", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ibuoro", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ibuoro", "iso_1_code": null, "iso_3_code": "ibr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6729", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Itu Mbon Uzo", "iso_1_code": null, "iso_3_code": "itm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6730", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ito", "iso_1_code": null, "iso_3_code": "itw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6731", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nkari", "iso_1_code": null, "iso_3_code": "nkz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6732", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6728", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Iko", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Iko", "iso_1_code": null, "iso_3_code": "iki", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6734", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6733", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ilue", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ilue", "iso_1_code": null, "iso_3_code": "ilv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6736", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6735", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Okobo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Okobo", "iso_1_code": null, "iso_3_code": "okb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6738", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6737", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oro", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Oro", "iso_1_code": null, "iso_3_code": "orx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6740", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6739", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Usaghade", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Usaghade", "iso_1_code": null, "iso_3_code": "usk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6742", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6741", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6707", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "6706", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Ogoni", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6706", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Ogoni", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Gokana", "iso_1_code": null, "iso_3_code": "gkn", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6745", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Khana", "iso_1_code": null, "iso_3_code": "ogo", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6746", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "T\u00e8\u00e8\u0323 \u0323", "iso_1_code": null, "iso_3_code": "tkq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6747", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6744", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Baan", "iso_1_code": null, "iso_3_code": "bvj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6749", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eleme", "iso_1_code": null, "iso_3_code": "elm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6750", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6748", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "6743", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Upper Cross", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6743", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Upper Cross", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Agoi-Doko-Iyoniyong", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Iyongiyong", "iso_1_code": null, "iso_3_code": "bbs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6753", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Robambami", "iso_1_code": null, "iso_3_code": "ibm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6754", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Deko-Dusanga", "iso_1_code": null, "iso_3_code": "uya", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6755", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6752", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Akpet", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ukpet-Ehom", "iso_1_code": null, "iso_3_code": "akd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6757", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6756", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "East-West", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ikom", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Olulumo-Ikom", "iso_1_code": null, "iso_3_code": "iko", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6761", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6760", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Loko", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Lubila", "iso_1_code": null, "iso_3_code": "kcc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6763", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nkukoli", "iso_1_code": null, "iso_3_code": "nbo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6764", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lokaa", "iso_1_code": null, "iso_3_code": "yaz", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6765", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6762", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbembe-Legbo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Legbo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Legbo", "iso_1_code": null, "iso_3_code": "agb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6768", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Leyigha", "iso_1_code": null, "iso_3_code": "ayi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6769", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lenyima", "iso_1_code": null, "iso_3_code": "ldg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6770", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6767", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbembe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mbembe, Cross River", "iso_1_code": null, "iso_3_code": "mfn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6772", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6771", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6766", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "6759", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "North-South", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6759", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "North-South", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Koring-Kukele", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Koring", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Oring", "iso_1_code": null, "iso_3_code": "org", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6776", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6775", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kukele", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Uzekwe", "iso_1_code": null, "iso_3_code": "eze", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6778", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kukele", "iso_1_code": null, "iso_3_code": "kez", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6779", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6777", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6774", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ubaghara-Kohumono", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kohumono", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hohumono", "iso_1_code": null, "iso_3_code": "bcs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6782", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Umon", "iso_1_code": null, "iso_3_code": "umm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6783", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Agwagwune", "iso_1_code": null, "iso_3_code": "yay", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6784", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6781", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ubaghara", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ubaghara", "iso_1_code": null, "iso_3_code": "byc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6786", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6785", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6780", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6773", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6758", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kiong-Korop", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kiong", "iso_1_code": null, "iso_3_code": "kkm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6788", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Durop", "iso_1_code": null, "iso_3_code": "krp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6789", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Odut", "iso_1_code": null, "iso_3_code": "oda", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6790", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6787", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6751", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6693", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6682", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Defoid", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"yo\")", - "original_lang_name": "yoruba", - "original_lang_code": "yor", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Akokoid", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Arigidi", "iso_1_code": null, "iso_3_code": "aqg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6793", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6792", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ayere-Ahan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "\u00c0h\u00e0n", "iso_1_code": null, "iso_3_code": "ahn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6795", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ayere", "iso_1_code": null, "iso_3_code": "aye", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6796", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6794", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yoruboid", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"yo\")", - "original_lang_name": "yoruba", - "original_lang_code": "yor", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Edekiri", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"yo\")", - "original_lang_name": "yoruba", - "original_lang_code": "yor", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ede Cabe", "iso_1_code": null, "iso_3_code": "cbj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6799", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ede Ica", "iso_1_code": null, "iso_3_code": "ica", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6800", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ede Idaca", "iso_1_code": null, "iso_3_code": "idd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6801", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "If\u00e8", "iso_1_code": null, "iso_3_code": "ife", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"yo\")", "original_lang_name": "yoruba", "original_lang_code": "yor", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6802", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ede Ije", "iso_1_code": null, "iso_3_code": "ijj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6803", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Isekiri", "iso_1_code": null, "iso_3_code": "its", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6804", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lucumi", "iso_1_code": null, "iso_3_code": "luq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6805", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mokole", "iso_1_code": null, "iso_3_code": "mkl", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"yo\")", "original_lang_name": "yoruba", "original_lang_code": "yor", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6806", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nago, Southern", "iso_1_code": null, "iso_3_code": "nqg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6807", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ede Nago, Kura", "iso_1_code": null, "iso_3_code": "nqk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6808", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ulukwumi", "iso_1_code": null, "iso_3_code": "ulb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6809", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nago, Northern", "iso_1_code": null, "iso_3_code": "xkb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6810", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yoruba", "iso_1_code": "yo", "iso_3_code": "yor", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"yo\")", "original_lang_name": "yoruba", "original_lang_code": "yor", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6811", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"yo\")", + "original_lang_name": "yoruba", + "original_lang_code": "yor", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6798", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Igala", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Igala", "iso_1_code": null, "iso_3_code": "igl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6813", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6812", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"yo\")", + "original_lang_name": "yoruba", + "original_lang_code": "yor", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6797", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"yo\")", + "original_lang_name": "yoruba", + "original_lang_code": "yor", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6791", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Edoid", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Delta", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Degema", "iso_1_code": null, "iso_3_code": "deg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6816", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Engenni", "iso_1_code": null, "iso_3_code": "enn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6817", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Epie", "iso_1_code": null, "iso_3_code": "epi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6818", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6815", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North-Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ihievbe", "iso_1_code": null, "iso_3_code": "ihi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6820", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ikhin-Aokho", "iso_1_code": null, "iso_3_code": "ikh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6821", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Edo-Esan-Ora", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Edo", "iso_1_code": null, "iso_3_code": "bin", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6823", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Emai-Iuleha-Ora", "iso_1_code": null, "iso_3_code": "ema", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6824", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Esan", "iso_1_code": null, "iso_3_code": "ish", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6825", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "6822", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Ghotuo-Uneme-Yekhee", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6822", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Ghotuo-Uneme-Yekhee", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Ghotuo", "iso_1_code": null, "iso_3_code": "aaa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6827", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ivbie North-Okpela-Arhe", "iso_1_code": null, "iso_3_code": "atg", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6828", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Enwan", "iso_1_code": null, "iso_3_code": "env", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6829", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Etsako", "iso_1_code": null, "iso_3_code": "ets", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6830", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Igwe", "iso_1_code": null, "iso_3_code": "igw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6831", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ikpeshi", "iso_1_code": null, "iso_3_code": "ikp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6832", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ososo", "iso_1_code": null, "iso_3_code": "oso", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6833", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sasaru", "iso_1_code": null, "iso_3_code": "sxs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6834", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uneme", "iso_1_code": null, "iso_3_code": "une", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6835", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6826", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6819", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northwestern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aduge", "iso_1_code": null, "iso_3_code": "adu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6837", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Osse", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ehueun", "iso_1_code": null, "iso_3_code": "ehu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6839", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Iyayu", "iso_1_code": null, "iso_3_code": "iya", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6840", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uhami", "iso_1_code": null, "iso_3_code": "uha", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6841", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ukue", "iso_1_code": null, "iso_3_code": "uku", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6842", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6838", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Akuku", "iso_1_code": null, "iso_3_code": "ayk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6844", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Idesa", "iso_1_code": null, "iso_3_code": "ids", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6845", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Okpe", "iso_1_code": null, "iso_3_code": "okx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6846", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oloma", "iso_1_code": null, "iso_3_code": "olm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6847", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Okpamheri", "iso_1_code": null, "iso_3_code": "opa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6848", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6843", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6836", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southwestern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Eruwa", "iso_1_code": null, "iso_3_code": "erh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6850", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uvbie", "iso_1_code": null, "iso_3_code": "evh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6851", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Isoko", "iso_1_code": null, "iso_3_code": "iso", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6852", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Okpe", "iso_1_code": null, "iso_3_code": "oke", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6853", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Urhobo", "iso_1_code": null, "iso_3_code": "urh", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6854", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6849", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "6814", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Idomoid", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6814", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Idomoid", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Akweya", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Eloyi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ajiri", "iso_1_code": null, "iso_3_code": "afo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6858", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6857", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Etulo-Idoma", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Etulo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Etulo", "iso_1_code": null, "iso_3_code": "utr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6861", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6860", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Idoma", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Agatu", "iso_1_code": null, "iso_3_code": "agc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6863", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Alago", "iso_1_code": null, "iso_3_code": "ala", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6864", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Idoma", "iso_1_code": null, "iso_3_code": "idu", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6865", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Igede", "iso_1_code": null, "iso_3_code": "ige", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6866", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yala", "iso_1_code": null, "iso_3_code": "yba", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6867", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6862", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6859", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6856", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yatye-Akpa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Akweya", "iso_1_code": null, "iso_3_code": "akf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6869", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yace", "iso_1_code": null, "iso_3_code": "ekr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6870", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6868", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "6855", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Igboid", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6855", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Igboid", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Ekpeye", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ekpeye", "iso_1_code": null, "iso_3_code": "ekp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6873", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6872", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Igbo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ezaa", "iso_1_code": null, "iso_3_code": "eza", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6875", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mgbolizhia", "iso_1_code": null, "iso_3_code": "gmz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6876", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Igbo", "iso_1_code": "ig", "iso_3_code": "ibo", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6877", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ika", "iso_1_code": null, "iso_3_code": "ikk", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6878", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ikwere", "iso_1_code": null, "iso_3_code": "ikw", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6879", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ikwo", "iso_1_code": null, "iso_3_code": "iqw", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6880", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Izii", "iso_1_code": null, "iso_3_code": "izz", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6881", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ogbah", "iso_1_code": null, "iso_3_code": "ogc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6882", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ukwuani-Aboh-Ndoni", "iso_1_code": null, "iso_3_code": "ukw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6883", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6874", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "6871", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Jukunoid", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6871", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Jukunoid", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Bete", "iso_1_code": null, "iso_3_code": "byf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6885", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Jukun-Mbembe-Wurbo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Jukun", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Jukun Takum", "iso_1_code": null, "iso_3_code": "jbu", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6889", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Jibu", "iso_1_code": null, "iso_3_code": "jib", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6890", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "H\u00f5ne", "iso_1_code": null, "iso_3_code": "juh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6891", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "W\u00e3pha", "iso_1_code": null, "iso_3_code": "juw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6892", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6888", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kororofa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Wannu", "iso_1_code": null, "iso_3_code": "jub", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6894", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wapan", "iso_1_code": null, "iso_3_code": "juk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6895", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jiba", "iso_1_code": null, "iso_3_code": "juo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6896", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6893", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbembe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Mbembe, Tigon", "iso_1_code": null, "iso_3_code": "nza", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6898", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6897", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Shoo-Minda-Nye", "iso_1_code": null, "iso_3_code": "bcv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6900", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6899", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wurbo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Karimjo", "iso_1_code": null, "iso_3_code": "cfg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6902", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jiru", "iso_1_code": null, "iso_3_code": "jrr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6903", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tita", "iso_1_code": null, "iso_3_code": "tdq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6904", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6901", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6887", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kpan-Icen", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Etkywan", "iso_1_code": null, "iso_3_code": "ich", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6906", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kpan", "iso_1_code": null, "iso_3_code": "kpk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6907", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6905", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "6886", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Yukuben-Kuteb", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6886", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Yukuben-Kuteb", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Akum", "iso_1_code": null, "iso_3_code": "aku", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6909", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Beezen", "iso_1_code": null, "iso_3_code": "bnz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6910", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kapya", "iso_1_code": null, "iso_3_code": "klo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6911", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kutep", "iso_1_code": null, "iso_3_code": "kub", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6912", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yukuben", "iso_1_code": null, "iso_3_code": "ybl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6913", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6908", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "6884", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Kainji", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6884", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Kainji", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Amo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Amo", "iso_1_code": null, "iso_3_code": "amo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6917", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6916", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern Jos", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Panawa", "iso_1_code": null, "iso_3_code": "pwb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6919", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jera", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gamo-Ningi", "iso_1_code": null, "iso_3_code": "bte", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6921", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Izora", "iso_1_code": null, "iso_3_code": "cbo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6922", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tunzuii", "iso_1_code": null, "iso_3_code": "dza", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6923", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lere", "iso_1_code": null, "iso_3_code": "gnh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6924", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gyem", "iso_1_code": null, "iso_3_code": "gye", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6925", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jere", "iso_1_code": null, "iso_3_code": "jer", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6926", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Janji", "iso_1_code": null, "iso_3_code": "jni", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6927", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kudu-Camo", "iso_1_code": null, "iso_3_code": "kov", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6928", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lemoro", "iso_1_code": null, "iso_3_code": "ldj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6929", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Iguta", "iso_1_code": null, "iso_3_code": "nar", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6930", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sheni", "iso_1_code": null, "iso_3_code": "scv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6931", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shau", "iso_1_code": null, "iso_3_code": "sqh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6932", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sanga", "iso_1_code": null, "iso_3_code": "xsn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6933", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6920", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kauru", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bina", "iso_1_code": null, "iso_3_code": "byj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6935", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dungu", "iso_1_code": null, "iso_3_code": "dbv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6936", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tugbiri-Niragu", "iso_1_code": null, "iso_3_code": "grh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6937", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kizamani", "iso_1_code": null, "iso_3_code": "izm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6938", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaivi", "iso_1_code": null, "iso_3_code": "kce", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6939", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vono", "iso_1_code": null, "iso_3_code": "kch", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6940", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kinuku", "iso_1_code": null, "iso_3_code": "kkd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6941", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tumi", "iso_1_code": null, "iso_3_code": "kku", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6942", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kono", "iso_1_code": null, "iso_3_code": "klk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6943", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kurama", "iso_1_code": null, "iso_3_code": "krh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6944", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rishiwa", "iso_1_code": null, "iso_3_code": "rsw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6945", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mala", "iso_1_code": null, "iso_3_code": "ruy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6946", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ruma", "iso_1_code": null, "iso_3_code": "ruz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6947", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vori", "iso_1_code": null, "iso_3_code": "sde", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6948", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6934", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6918", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Piti-Atsam", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Atsam", "iso_1_code": null, "iso_3_code": "cch", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6950", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Abishi", "iso_1_code": null, "iso_3_code": "pcn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6951", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6949", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6915", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Basa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Basa-Gumna", "iso_1_code": null, "iso_3_code": "bsl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6954", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bassa-Kontagora", "iso_1_code": null, "iso_3_code": "bsr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6955", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Basa-Gurmana", "iso_1_code": null, "iso_3_code": "buj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6956", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Basa", "iso_1_code": null, "iso_3_code": "bzw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6957", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6953", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Baushi-Gurmana", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bauchi", "iso_1_code": null, "iso_3_code": "bsf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6959", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gurmana", "iso_1_code": null, "iso_3_code": "gvm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6960", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6958", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Duka", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Gwamhi-Wuri", "iso_1_code": null, "iso_3_code": "bga", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6962", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Damakawa", "iso_1_code": null, "iso_3_code": "dam", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6963", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "C\u2019Lela", "iso_1_code": null, "iso_3_code": "dri", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6964", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "ut-Ma\u2019in", "iso_1_code": null, "iso_3_code": "gel", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6965", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "us-Saare", "iso_1_code": null, "iso_3_code": "uss", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6966", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "ut-Hun", "iso_1_code": null, "iso_3_code": "uth", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6967", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6961", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kainji Lake", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tsucuba", "iso_1_code": null, "iso_3_code": "cbq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6969", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Laru", "iso_1_code": null, "iso_3_code": "lan", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6970", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lopa", "iso_1_code": null, "iso_3_code": "lop", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6971", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6968", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kambari", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Cishingini", "iso_1_code": null, "iso_3_code": "asg", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6973", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Baangi", "iso_1_code": null, "iso_3_code": "bqx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6974", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tsikimba", "iso_1_code": null, "iso_3_code": "kdl", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6975", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tsishingini", "iso_1_code": null, "iso_3_code": "tsw", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6976", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tsuvadi", "iso_1_code": null, "iso_3_code": "tvd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6977", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6972", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kamuku", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Acipa, Eastern", "iso_1_code": null, "iso_3_code": "acp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6979", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cicipu", "iso_1_code": null, "iso_3_code": "awc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6980", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kamuku", "iso_1_code": null, "iso_3_code": "cdr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6981", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cahungwarya", "iso_1_code": null, "iso_3_code": "nat", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6982", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pangu", "iso_1_code": null, "iso_3_code": "png", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6983", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rogo", "iso_1_code": null, "iso_3_code": "rod", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6984", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shama-Sambuga", "iso_1_code": null, "iso_3_code": "sqa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6985", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Fungwa", "iso_1_code": null, "iso_3_code": "ula", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6986", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6978", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Reshe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Reshe", "iso_1_code": null, "iso_3_code": "res", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6988", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6987", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6952", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "6914", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Nupoid", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "6914", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Nupoid", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Ebira-Gade", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gade", "iso_1_code": null, "iso_3_code": "ged", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6991", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ebira", "iso_1_code": null, "iso_3_code": "igb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6992", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6990", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nupe-Gbagyi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Dibo", "iso_1_code": null, "iso_3_code": "dio", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6994", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gbagyi-Gbari", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Gbagyi", "iso_1_code": null, "iso_3_code": "gbr", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "6996", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Gbari", "iso_1_code": null, "iso_3_code": "gby", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6997", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6995", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nupe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Asu", "iso_1_code": null, "iso_3_code": "aum", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "6999", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gupa-Abawa", "iso_1_code": null, "iso_3_code": "gpa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7000", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kakanda", "iso_1_code": null, "iso_3_code": "kka", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7001", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kami", "iso_1_code": null, "iso_3_code": "kmi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7002", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kupa", "iso_1_code": null, "iso_3_code": "kug", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7003", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nupe-Nupe-Tako", "iso_1_code": null, "iso_3_code": "nup", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7004", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "6998", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6993", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "6989", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oko", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Oko-Eni-Osayen", "iso_1_code": null, "iso_3_code": "oks", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7006", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7005", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Plateau", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Atoro", "iso_1_code": null, "iso_3_code": "tdv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7008", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Alumic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Arum", "iso_1_code": null, "iso_3_code": "aab", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7010", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7009", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ayu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ayu", "iso_1_code": null, "iso_3_code": "ayu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7012", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7011", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Beromic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Berom", "iso_1_code": null, "iso_3_code": "bom", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7014", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Iten", "iso_1_code": null, "iso_3_code": "etx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7015", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shall-Zwall", "iso_1_code": null, "iso_3_code": "sha", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7016", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7013", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Central", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7013", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Central", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "North-Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cara", "iso_1_code": null, "iso_3_code": "cfd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7019", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7018", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South-Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ibaas", "iso_1_code": null, "iso_3_code": "cen", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7021", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Firan", "iso_1_code": null, "iso_3_code": "fir", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7022", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ganang", "iso_1_code": null, "iso_3_code": "gne", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7023", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rigwe", "iso_1_code": null, "iso_3_code": "iri", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7024", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Izere", "iso_1_code": null, "iso_3_code": "izr", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7025", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Jju", "iso_1_code": null, "iso_3_code": "kaj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7026", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tyap", "iso_1_code": null, "iso_3_code": "kcg", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7027", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7020", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West-Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ahwai", "iso_1_code": null, "iso_3_code": "nfd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7029", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7028", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7017", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ejuele", "iso_1_code": null, "iso_3_code": "dbi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7031", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ajiya", "iso_1_code": null, "iso_3_code": "idc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7032", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ikulu", "iso_1_code": null, "iso_3_code": "ikl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7033", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Iku-Gora-Ankwa", "iso_1_code": null, "iso_3_code": "ikv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7034", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Adara", "iso_1_code": null, "iso_3_code": "kad", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7035", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuturmi", "iso_1_code": null, "iso_3_code": "khj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7036", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7030", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southeastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Horom", "iso_1_code": null, "iso_3_code": "hoe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7038", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bo-Rukul", "iso_1_code": null, "iso_3_code": "mae", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7039", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pyam", "iso_1_code": null, "iso_3_code": "pym", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7040", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7037", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Migili", "iso_1_code": null, "iso_3_code": "mgi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7042", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rjili", "iso_1_code": null, "iso_3_code": "uji", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7043", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koro Nulu", "iso_1_code": null, "iso_3_code": "vkn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7044", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koro Zuba", "iso_1_code": null, "iso_3_code": "vkz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7045", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7041", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tarokoid", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yangkam", "iso_1_code": null, "iso_3_code": "bsx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7047", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pye", "iso_1_code": null, "iso_3_code": "pai", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7048", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kusur-Myet", "iso_1_code": null, "iso_3_code": "tdl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7049", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tarok", "iso_1_code": null, "iso_3_code": "yer", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7050", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7046", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Northwestern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hyamic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kyoli", "iso_1_code": null, "iso_3_code": "cry", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7054", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hyam", "iso_1_code": null, "iso_3_code": "jab", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7055", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gyong", "iso_1_code": null, "iso_3_code": "kdm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7056", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shamang", "iso_1_code": null, "iso_3_code": "xsh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7057", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zhire", "iso_1_code": null, "iso_3_code": "zhi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7058", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7053", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koro", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ashe", "iso_1_code": null, "iso_3_code": "ahs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7060", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koro Wachi", "iso_1_code": null, "iso_3_code": "bqv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7061", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Duya", "iso_1_code": null, "iso_3_code": "ldb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7062", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyankpa", "iso_1_code": null, "iso_3_code": "yes", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7063", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7059", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7052", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southwestern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "A", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Aninka", "iso_1_code": null, "iso_3_code": "aqk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7066", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kadung", "iso_1_code": null, "iso_3_code": "dkg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7067", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bijim", "iso_1_code": null, "iso_3_code": "jbm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7068", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bu", "iso_1_code": null, "iso_3_code": "jid", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7069", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kamantan", "iso_1_code": null, "iso_3_code": "kci", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7070", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nikyob-Nindem", "iso_1_code": null, "iso_3_code": "kdp", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7071", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kanufi", "iso_1_code": null, "iso_3_code": "kni", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7072", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mada", "iso_1_code": null, "iso_3_code": "mda", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7073", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Numana", "iso_1_code": null, "iso_3_code": "nbr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7074", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ninzo", "iso_1_code": null, "iso_3_code": "nin", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7075", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nungu", "iso_1_code": null, "iso_3_code": "rin", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7076", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuce", "iso_1_code": null, "iso_3_code": "ruk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7077", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tiyaa", "iso_1_code": null, "iso_3_code": "tyy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7078", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7065", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "B", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Akye", "iso_1_code": null, "iso_3_code": "aik", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7080", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eggon", "iso_1_code": null, "iso_3_code": "ego", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7081", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sambe", "iso_1_code": null, "iso_3_code": "xab", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7082", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hasha", "iso_1_code": null, "iso_3_code": "ybj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7083", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7079", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7064", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7051", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7007", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ukaan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ukaan", "iso_1_code": null, "iso_3_code": "kcf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7085", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7084", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Fali of Baissa", "iso_1_code": null, "iso_3_code": "fah", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7087", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7086", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "5449", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Dogon", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5449", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Dogon", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Dogon, Ampari", "iso_1_code": null, "iso_3_code": "aqd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7089", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dogon, Dogul Dom", "iso_1_code": null, "iso_3_code": "dbg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7090", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dogon, Ben Tey", "iso_1_code": null, "iso_3_code": "dbt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7091", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dogon, Bondum Dom", "iso_1_code": null, "iso_3_code": "dbu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7092", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dogon, Bankan Tey", "iso_1_code": null, "iso_3_code": "dbw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7093", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dogon, Donno So", "iso_1_code": null, "iso_3_code": "dds", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7094", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dogon, Bunoge", "iso_1_code": null, "iso_3_code": "dgb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7095", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dogon, Jamsay", "iso_1_code": null, "iso_3_code": "djm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7096", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dogon, Mombo", "iso_1_code": null, "iso_3_code": "dmb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7097", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dogon, Ana Tinga", "iso_1_code": null, "iso_3_code": "dti", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7098", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dogon, Tene Kan", "iso_1_code": null, "iso_3_code": "dtk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7099", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dogon, Tomo Kan", "iso_1_code": null, "iso_3_code": "dtm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7100", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dogon, Tommo So", "iso_1_code": null, "iso_3_code": "dto", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7101", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dogon, Toro So", "iso_1_code": null, "iso_3_code": "dts", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7102", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Dogon, Toro Tegu", "iso_1_code": null, "iso_3_code": "dtt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7103", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dogon, Tebul Ure", "iso_1_code": null, "iso_3_code": "dtu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7104", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dogon, Yanda Dom", "iso_1_code": null, "iso_3_code": "dym", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7105", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dogon, Nanga Dama", "iso_1_code": null, "iso_3_code": "nzz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7106", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dogon, Tiranige Diga", "iso_1_code": null, "iso_3_code": "tde", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7107", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7088", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Kru", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7088", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Kru", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Aizi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aizi, Tiagbamrin", "iso_1_code": null, "iso_3_code": "ahi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7110", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aizi, Mobumrin", "iso_1_code": null, "iso_3_code": "ahm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7111", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aizi, Aproumu", "iso_1_code": null, "iso_3_code": "ahp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7112", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7109", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bakwe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bakw\u00e9", "iso_1_code": null, "iso_3_code": "bjw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7115", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wan\u00e9", "iso_1_code": null, "iso_3_code": "hwa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7116", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7114", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bete", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "B\u00e9t\u00e9, Gagnoa", "iso_1_code": null, "iso_3_code": "btg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7119", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kouya", "iso_1_code": null, "iso_3_code": "kyf", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7120", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7118", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "B\u00e9t\u00e9, Guiberoua", "iso_1_code": null, "iso_3_code": "bet", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7122", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "B\u00e9t\u00e9, Daloa", "iso_1_code": null, "iso_3_code": "bev", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7123", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Godi\u00e9", "iso_1_code": null, "iso_3_code": "god", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7124", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7121", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7117", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Dida", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7117", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Dida", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Dida, Lakota", "iso_1_code": null, "iso_3_code": "dic", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7126", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gu\u00e9bie", "iso_1_code": null, "iso_3_code": "gie", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7127", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dida, Yocobou\u00e9", "iso_1_code": null, "iso_3_code": "gud", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7128", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Neyo", "iso_1_code": null, "iso_3_code": "ney", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7129", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7125", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kwadia", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kodia", "iso_1_code": null, "iso_3_code": "kwp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7131", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7130", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7113", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Kuwaa", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7113", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Kuwaa", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Kuwaa", "iso_1_code": null, "iso_3_code": "blh", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7133", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7132", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Seme", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Siamou", "iso_1_code": null, "iso_3_code": "sif", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7135", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7134", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bassa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bassa", "iso_1_code": null, "iso_3_code": "bsq", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7138", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Dewoin", "iso_1_code": null, "iso_3_code": "dee", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7139", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gbii", "iso_1_code": null, "iso_3_code": "ggb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7140", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7137", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Grebo", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7137", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Grebo", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Glio-Oubi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Glio-Oubi", "iso_1_code": null, "iso_3_code": "oub", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7143", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7142", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ivorian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Krumen, Plapo", "iso_1_code": null, "iso_3_code": "ktj", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7145", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Krumen, Pye", "iso_1_code": null, "iso_3_code": "pye", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7146", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Krumen, Tepo", "iso_1_code": null, "iso_3_code": "ted", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7147", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "7144", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Liberian", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7144", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Liberian", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Grebo, Northern", "iso_1_code": null, "iso_3_code": "gbo", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7149", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Grebo, Gboloo", "iso_1_code": null, "iso_3_code": "gec", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7150", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Grebo, Southern", "iso_1_code": null, "iso_3_code": "grj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7151", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Grebo, Central", "iso_1_code": null, "iso_3_code": "grv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7152", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Grebo, Barclayville", "iso_1_code": null, "iso_3_code": "gry", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7153", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7148", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7141", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Klao", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Klao", "iso_1_code": null, "iso_3_code": "klu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7155", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tajuasohn", "iso_1_code": null, "iso_3_code": "tja", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7156", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7154", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wee", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Guere-Krahn", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Daho-Doo", "iso_1_code": null, "iso_3_code": "das", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7159", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Glaro-Twabo", "iso_1_code": null, "iso_3_code": "glr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7160", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "W\u00e8 Southern", "iso_1_code": null, "iso_3_code": "gxx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7161", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sapo", "iso_1_code": null, "iso_3_code": "krn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7162", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Krahn, Western", "iso_1_code": null, "iso_3_code": "krw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7163", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "W\u00e8 Western", "iso_1_code": null, "iso_3_code": "wec", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7164", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7158", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Konobo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Krahn, Eastern", "iso_1_code": null, "iso_3_code": "kqo", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7166", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "7165", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Nyabwa", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7165", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Nyabwa", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Nyabwa", "iso_1_code": null, "iso_3_code": "nwb", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7168", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "7167", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Wobe", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7167", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Wobe", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "W\u00e8 Northern", "iso_1_code": null, "iso_3_code": "wob", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7170", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7169", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7157", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] + } + ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" } - ], + }, "node_i": "7136", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7108", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Kwa", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7108", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Kwa", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Esuma", "iso_1_code": null, "iso_3_code": "esm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7172", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Boro", "iso_1_code": null, "iso_3_code": "xxb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7173", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Left Bank", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Avatime-Nyangbo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Avatime", "iso_1_code": null, "iso_3_code": "avn", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7176", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nyagbo", "iso_1_code": null, "iso_3_code": "nyb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7177", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tafi", "iso_1_code": null, "iso_3_code": "tcd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7178", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7175", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Gbe", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7175", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Gbe", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Aguna", "iso_1_code": null, "iso_3_code": "aug", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7180", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gbe, Ci", "iso_1_code": null, "iso_3_code": "cib", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7181", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "\u00c9w\u00e9", "iso_1_code": "ee", "iso_3_code": "ewe", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7182", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Gbe, Gbesi", "iso_1_code": null, "iso_3_code": "gbs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7183", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gbe, Eastern Xwla", "iso_1_code": null, "iso_3_code": "gbx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7184", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kpessi", "iso_1_code": null, "iso_3_code": "kef", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7185", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gbe, Kotafon", "iso_1_code": null, "iso_3_code": "kqk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7186", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gbe, Saxwe", "iso_1_code": null, "iso_3_code": "sxw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7187", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gbe, Waci", "iso_1_code": null, "iso_3_code": "wci", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7188", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wudu", "iso_1_code": null, "iso_3_code": "wud", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7189", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gbe, Xwela", "iso_1_code": null, "iso_3_code": "xwe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7190", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gbe, Western Xwla", "iso_1_code": null, "iso_3_code": "xwl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7191", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aja", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Aja", "iso_1_code": null, "iso_3_code": "ajg", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7193", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Gbe, Ayizo", "iso_1_code": null, "iso_3_code": "ayb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7194", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gbe, Defi", "iso_1_code": null, "iso_3_code": "gbh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7195", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gun", "iso_1_code": null, "iso_3_code": "guw", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7196", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Gbe, Tofin", "iso_1_code": null, "iso_3_code": "tfi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7197", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gbe, Weme", "iso_1_code": null, "iso_3_code": "wem", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7198", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7192", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Fon", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7192", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Fon", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Fon", "iso_1_code": null, "iso_3_code": "fon", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7200", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Gbe, Maxi", "iso_1_code": null, "iso_3_code": "mxl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7201", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7199", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Mina", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7199", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Mina", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Gen", "iso_1_code": null, "iso_3_code": "gej", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7203", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7202", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7179", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kebu-Animere", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Animere", "iso_1_code": null, "iso_3_code": "anf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7205", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Akebu", "iso_1_code": null, "iso_3_code": "keu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7206", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7204", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kposo-Ahlo-Bowili", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Adangbe", "iso_1_code": null, "iso_3_code": "adq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7208", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Igo", "iso_1_code": null, "iso_3_code": "ahl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7209", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tuwuli", "iso_1_code": null, "iso_3_code": "bov", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7210", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ikposo", "iso_1_code": null, "iso_3_code": "kpo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7211", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7207", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7174", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Nyo", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7174", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Nyo", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Agneby", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ab\u00e9", "iso_1_code": null, "iso_3_code": "aba", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7214", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Abidji", "iso_1_code": null, "iso_3_code": "abi", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7215", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Adioukrou", "iso_1_code": null, "iso_3_code": "adj", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7216", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "7213", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Attie", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7213", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Attie", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Atti\u00e9", "iso_1_code": null, "iso_3_code": "ati", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7218", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "7217", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Avikam-Alladian", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7217", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Avikam-Alladian", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Alladian", "iso_1_code": null, "iso_3_code": "ald", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7220", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Avikam", "iso_1_code": null, "iso_3_code": "avi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7221", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7219", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Ga-Dangme", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7219", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Ga-Dangme", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Dangme", "iso_1_code": null, "iso_3_code": "ada", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7223", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ga", "iso_1_code": null, "iso_3_code": "gaa", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7224", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "7222", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Potou-Tano", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7222", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Potou-Tano", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Basila-Adele", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Adele", "iso_1_code": null, "iso_3_code": "ade", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7227", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Anii", "iso_1_code": null, "iso_3_code": "blo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7228", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7226", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ega", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ega", "iso_1_code": null, "iso_3_code": "ega", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7230", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7229", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lelemi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Lelemi-Akpafu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Siwu", "iso_1_code": null, "iso_3_code": "akp", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7233", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Lelemi", "iso_1_code": null, "iso_3_code": "lef", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7234", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "7232", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Likpe-Santrokofi", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7232", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Likpe-Santrokofi", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Sekpele", "iso_1_code": null, "iso_3_code": "lip", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7236", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Selee", "iso_1_code": null, "iso_3_code": "snw", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7237", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7235", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7231", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Logba", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Logba", "iso_1_code": null, "iso_3_code": "lgq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7239", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7238", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Potou", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tchaman", "iso_1_code": null, "iso_3_code": "ebr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7241", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbato", "iso_1_code": null, "iso_3_code": "gwa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7242", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7240", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tano", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Akan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Abron", "iso_1_code": null, "iso_3_code": "abr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7246", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Akan", "iso_1_code": "ak", "iso_3_code": "aka", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7247", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wasa", "iso_1_code": null, "iso_3_code": "wss", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7248", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7245", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bia", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Northern", - "iso_1_code": null, - "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Anyin", "iso_1_code": null, "iso_3_code": "any", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7251", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Baoul\u00e9", "iso_1_code": null, "iso_3_code": "bci", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7252", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Anufo", "iso_1_code": null, "iso_3_code": "cko", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7253", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Anyin Morofo", "iso_1_code": null, "iso_3_code": "mtb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7254", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Esahie", "iso_1_code": null, "iso_3_code": "sfw", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7255", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "7250", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Southern", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7250", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Southern", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Ahanta", "iso_1_code": null, "iso_3_code": "aha", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7257", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Jwira-Pepesa", "iso_1_code": null, "iso_3_code": "jwi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7258", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nzema", "iso_1_code": null, "iso_3_code": "nzi", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7259", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7256", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7249", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7244", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Guang", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7244", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Guang", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "North Guang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Gikyode", "iso_1_code": null, "iso_3_code": "acd", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7262", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ginyanga", "iso_1_code": null, "iso_3_code": "ayg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7263", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tchumbuli", "iso_1_code": null, "iso_3_code": "bqa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7264", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dompo", "iso_1_code": null, "iso_3_code": "doy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7265", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Foodo", "iso_1_code": null, "iso_3_code": "fod", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7266", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gonja", "iso_1_code": null, "iso_3_code": "gjn", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7267", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kplang", "iso_1_code": null, "iso_3_code": "kph", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7268", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Krache", "iso_1_code": null, "iso_3_code": "kye", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7269", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nawuri", "iso_1_code": null, "iso_3_code": "naw", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7270", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chumburung", "iso_1_code": null, "iso_3_code": "ncu", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7271", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nkonya", "iso_1_code": null, "iso_3_code": "nko", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7272", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nkami", "iso_1_code": null, "iso_3_code": "nkq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7273", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nchumbulu", "iso_1_code": null, "iso_3_code": "nlu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7274", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dwang", "iso_1_code": null, "iso_3_code": "nnu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7275", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7261", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South Guang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Awutu", "iso_1_code": null, "iso_3_code": "afu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7277", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cherepon", "iso_1_code": null, "iso_3_code": "cpn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7278", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gua", "iso_1_code": null, "iso_3_code": "gwx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7279", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Larteh", "iso_1_code": null, "iso_3_code": "lar", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7280", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7276", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7260", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Krobu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Krobu", "iso_1_code": null, "iso_3_code": "kxb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7282", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7281", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Abure", "iso_1_code": null, "iso_3_code": "abu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7284", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Beti", "iso_1_code": null, "iso_3_code": "eot", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7285", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7283", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7243", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7225", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7212", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7171", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "North", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7171", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "North", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Adamawa-Ubangi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Adamawa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Fali", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Fali, South", "iso_1_code": null, "iso_3_code": "fal", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7290", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Fali, North", "iso_1_code": null, "iso_3_code": "fll", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7291", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] + } + ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" } - ], + }, "node_i": "7289", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kam", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kam", "iso_1_code": null, "iso_3_code": "kdx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7293", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7292", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kwa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kwa", "iso_1_code": null, "iso_3_code": "kwb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7295", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7294", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "La\u2019bi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "La\u2019bi", "iso_1_code": null, "iso_3_code": "lbi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7297", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7296", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Leko-Nimbari", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Duru", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Dii", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Duupa", "iso_1_code": null, "iso_3_code": "dae", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7301", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dii", "iso_1_code": null, "iso_3_code": "dur", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7302", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Dugun", "iso_1_code": null, "iso_3_code": "ndu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7303", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7300", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Duli", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Duli-Gey", "iso_1_code": null, "iso_3_code": "duz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7305", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7304", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Voko-Dowayo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kutin", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Pere", "iso_1_code": null, "iso_3_code": "pfe", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7308", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "7307", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Vere-Dowayo", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7307", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Vere-Dowayo", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Dowayo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Doyayo", "iso_1_code": null, "iso_3_code": "dow", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7311", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "7310", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Vere-Gimme", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7310", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Vere-Gimme", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Gimme", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gimnime", "iso_1_code": null, "iso_3_code": "gmn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7314", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gimme", "iso_1_code": null, "iso_3_code": "kmp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7315", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7313", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vere", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Koma", "iso_1_code": null, "iso_3_code": "kmy", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7317", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Verre", "iso_1_code": null, "iso_3_code": "ver", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7318", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7316", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7312", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7309", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Voko", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Longto", "iso_1_code": null, "iso_3_code": "wok", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7320", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7319", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7306", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7299", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Leko", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7299", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Leko", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Kolbila", "iso_1_code": null, "iso_3_code": "klc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7322", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mubako", "iso_1_code": null, "iso_3_code": "muo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7323", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Samba Leko", "iso_1_code": null, "iso_3_code": "ndi", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7324", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Wom", "iso_1_code": null, "iso_3_code": "wom", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7325", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7321", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Mumuye-Yandang", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7321", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Mumuye-Yandang", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Mumuye", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Gengle", "iso_1_code": null, "iso_3_code": "geg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7328", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kumba", "iso_1_code": null, "iso_3_code": "ksm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7329", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mumuye", "iso_1_code": null, "iso_3_code": "mzm", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7330", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Pangseng", "iso_1_code": null, "iso_3_code": "pgs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7331", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rang", "iso_1_code": null, "iso_3_code": "rax", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7332", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teme", "iso_1_code": null, "iso_3_code": "tdo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7333", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waka", "iso_1_code": null, "iso_3_code": "wav", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7334", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7327", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yandang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bali", "iso_1_code": null, "iso_3_code": "bcn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7336", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kugama", "iso_1_code": null, "iso_3_code": "kow", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7337", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kpasham", "iso_1_code": null, "iso_3_code": "pbn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7338", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yendang", "iso_1_code": null, "iso_3_code": "ynq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7339", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yotti", "iso_1_code": null, "iso_3_code": "yot", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7340", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7335", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7326", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nimbari", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nimbari", "iso_1_code": null, "iso_3_code": "nmr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7342", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7341", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7298", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Mbum-Day", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7298", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Mbum-Day", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Bua", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bua", "iso_1_code": null, "iso_3_code": "bub", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7345", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bolgo", "iso_1_code": null, "iso_3_code": "bvo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7346", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Fania", "iso_1_code": null, "iso_3_code": "fni", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7347", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bon Gula", "iso_1_code": null, "iso_3_code": "glc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7348", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gula Iro", "iso_1_code": null, "iso_3_code": "glj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7349", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koke", "iso_1_code": null, "iso_3_code": "kou", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7350", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Niellim", "iso_1_code": null, "iso_3_code": "nie", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7351", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Noy", "iso_1_code": null, "iso_3_code": "noy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7352", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tunia", "iso_1_code": null, "iso_3_code": "tug", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7353", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zan Gula", "iso_1_code": null, "iso_3_code": "zna", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7354", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7344", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Day", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Day", "iso_1_code": null, "iso_3_code": "dai", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7356", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7355", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kim", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Besme", "iso_1_code": null, "iso_3_code": "bes", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7358", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Goundo", "iso_1_code": null, "iso_3_code": "goy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7359", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kim", "iso_1_code": null, "iso_3_code": "kia", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7360", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "7357", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Mbum", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7357", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Mbum", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Eastern Mbum", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Karang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kare", "iso_1_code": null, "iso_3_code": "kbn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7364", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karang", "iso_1_code": null, "iso_3_code": "kzr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7365", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nzakambay", "iso_1_code": null, "iso_3_code": "nzy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7366", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pana", "iso_1_code": null, "iso_3_code": "pnz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7367", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7363", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koh", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kuo", "iso_1_code": null, "iso_3_code": "xuo", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7369", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7368", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7362", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Northern", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7362", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Northern", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Dama-Galke", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dama", "iso_1_code": null, "iso_3_code": "dmm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7372", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ndai", "iso_1_code": null, "iso_3_code": "gke", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7373", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mono", "iso_1_code": null, "iso_3_code": "mru", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7374", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7371", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tupuri-Mambai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Mambai", "iso_1_code": null, "iso_3_code": "mcs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7376", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mundang", "iso_1_code": null, "iso_3_code": "mua", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7377", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tupuri", "iso_1_code": null, "iso_3_code": "tui", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7378", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7375", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7370", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mbum", "iso_1_code": null, "iso_3_code": "mdd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7380", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7379", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dek", "iso_1_code": null, "iso_3_code": "dek", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7382", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pam", "iso_1_code": null, "iso_3_code": "pmn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7383", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "To", "iso_1_code": null, "iso_3_code": "toz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7384", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7381", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7361", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7343", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Oblo", "iso_1_code": null, "iso_3_code": "obl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7386", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7385", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waja-Jen", - "iso_1_code": null, - "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Jen", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kyak", "iso_1_code": null, "iso_3_code": "bka", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7389", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Burak", "iso_1_code": null, "iso_3_code": "bys", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7390", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "M\u00e1ghd\u00ec", "iso_1_code": null, "iso_3_code": "gmd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7391", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Moo", "iso_1_code": null, "iso_3_code": "gwg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7392", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dza", "iso_1_code": null, "iso_3_code": "jen", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7393", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Leelau", "iso_1_code": null, "iso_3_code": "ldk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7394", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Loo", "iso_1_code": null, "iso_3_code": "ldo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7395", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mingang Doso", "iso_1_code": null, "iso_3_code": "mko", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7396", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mak", "iso_1_code": null, "iso_3_code": "pbl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7397", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tha", "iso_1_code": null, "iso_3_code": "thy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7398", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7388", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Longuda", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Longuda", "iso_1_code": null, "iso_3_code": "lnu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7400", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7399", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waja", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Awak", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Awak", "iso_1_code": null, "iso_3_code": "awo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7403", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kamo", "iso_1_code": null, "iso_3_code": "kcq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7404", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7402", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cham-Mona", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cham", "iso_1_code": null, "iso_3_code": "cfa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7406", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tso", "iso_1_code": null, "iso_3_code": "ldp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7407", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7405", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dadiya", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dadiya", "iso_1_code": null, "iso_3_code": "dbd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7409", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7408", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tula", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bangwinji", "iso_1_code": null, "iso_3_code": "bsj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7411", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tula", "iso_1_code": null, "iso_3_code": "tul", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7412", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Waja", "iso_1_code": null, "iso_3_code": "wja", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7413", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7410", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7401", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yungur", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Libo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kaan", "iso_1_code": null, "iso_3_code": "ldl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7416", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7415", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mboi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mboi", "iso_1_code": null, "iso_3_code": "moi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7418", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7417", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yungur-Roba", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lala-Roba", "iso_1_code": null, "iso_3_code": "lla", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7420", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Voro", "iso_1_code": null, "iso_3_code": "vor", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7421", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bena", "iso_1_code": null, "iso_3_code": "yun", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7422", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7419", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7414", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7387", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7288", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Ubangi", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7288", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Ubangi", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Banda", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Central Core", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Banda-Bambari", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Banda-Bambari", "iso_1_code": null, "iso_3_code": "liy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7428", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7427", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Banda-Banda", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Banda-Banda", "iso_1_code": null, "iso_3_code": "bpd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7430", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7429", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Banda-Mbres", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Banda-Mbr\u00e8s", "iso_1_code": null, "iso_3_code": "bqk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7432", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7431", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Banda-Ndele", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Banda-Nd\u00e9l\u00e9", "iso_1_code": null, "iso_3_code": "bfl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7434", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7433", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mid-Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Banda, Mid-Southern", "iso_1_code": null, "iso_3_code": "bjo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7436", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gobu", "iso_1_code": null, "iso_3_code": "gox", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7437", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kpagua", "iso_1_code": null, "iso_3_code": "kuw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7438", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mono", "iso_1_code": null, "iso_3_code": "mnh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7439", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngundu", "iso_1_code": null, "iso_3_code": "nue", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7440", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7435", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Togbo-Vara", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Banda, Togbo-Vara", "iso_1_code": null, "iso_3_code": "tor", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7442", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7441", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7426", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Banda-Yangere", "iso_1_code": null, "iso_3_code": "yaj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7444", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7443", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7425", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Langbashe", "iso_1_code": null, "iso_3_code": "lna", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7446", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Banda, South Central", "iso_1_code": null, "iso_3_code": "lnl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7447", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7445", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mbandja", "iso_1_code": null, "iso_3_code": "zmz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7449", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7448", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southwestern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ngbundu", "iso_1_code": null, "iso_3_code": "nuu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7451", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7450", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Banda, West Central", "iso_1_code": null, "iso_3_code": "bbp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7453", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7452", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7424", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gbaya-Manza-Ngbaka", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Suma", "iso_1_code": null, "iso_3_code": "sqm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7455", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bhogoto", "iso_1_code": null, "iso_3_code": "bdt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7457", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gbaya-Bossangoa", "iso_1_code": null, "iso_3_code": "gbp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7458", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gbaya-Bozoum", "iso_1_code": null, "iso_3_code": "gbq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7459", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gbanu", "iso_1_code": null, "iso_3_code": "gbv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7460", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7456", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ali", "iso_1_code": null, "iso_3_code": "aiy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7462", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bofi", "iso_1_code": null, "iso_3_code": "bff", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7463", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mandja", "iso_1_code": null, "iso_3_code": "mzv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7464", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngbaka", "iso_1_code": null, "iso_3_code": "nga", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7465", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngbaka Manza", "iso_1_code": null, "iso_3_code": "ngg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7466", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7461", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northwest", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Gbaya, Northwest", "iso_1_code": null, "iso_3_code": "gya", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7468", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "7467", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Southwest", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7467", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Southwest", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Bangandu", "iso_1_code": null, "iso_3_code": "bgf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7470", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gbaya-Mbodomo", "iso_1_code": null, "iso_3_code": "gmm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7471", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gbaya, Southwest", "iso_1_code": null, "iso_3_code": "gso", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7472", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ngombe", "iso_1_code": null, "iso_3_code": "nmj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7473", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7469", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7454", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Ngbandi", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7454", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Ngbandi", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Dendi", "iso_1_code": null, "iso_3_code": "deq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7475", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gbayi", "iso_1_code": null, "iso_3_code": "gyg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7476", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbangi", "iso_1_code": null, "iso_3_code": "mgn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7477", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngbandi, Southern", "iso_1_code": null, "iso_3_code": "nbw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7478", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngbandi, Northern", "iso_1_code": null, "iso_3_code": "ngb", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7479", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yakoma", "iso_1_code": null, "iso_3_code": "yky", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7480", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7474", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Sere-Ngbaka-Mba", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7474", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Sere-Ngbaka-Mba", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Ngbaka-Mba", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Mba", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dongo", "iso_1_code": null, "iso_3_code": "doo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7484", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mba", "iso_1_code": null, "iso_3_code": "mfc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7485", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ma", "iso_1_code": null, "iso_3_code": "msj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7486", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ndunga", "iso_1_code": null, "iso_3_code": "ndt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7487", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7483", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngbaka", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Mayogo-Bangba", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bangba", "iso_1_code": null, "iso_3_code": "bbe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7491", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mayogo", "iso_1_code": null, "iso_3_code": "mdm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7492", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7490", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mundu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "M\u00fcnd\u00fc", "iso_1_code": null, "iso_3_code": "muh", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7494", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7493", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7489", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Baka-Gundi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Baka", "iso_1_code": null, "iso_3_code": "bkc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7497", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Limassa", "iso_1_code": null, "iso_3_code": "bme", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7498", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gundi", "iso_1_code": null, "iso_3_code": "gdi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7499", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ganzi", "iso_1_code": null, "iso_3_code": "gnz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7500", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7496", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bwaka", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gilima", "iso_1_code": null, "iso_3_code": "gix", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7502", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngbaka Ma\u2019bo", "iso_1_code": null, "iso_3_code": "nbm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7503", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7501", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gbanzili", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Buraka", "iso_1_code": null, "iso_3_code": "bkg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7505", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gbanziri", "iso_1_code": null, "iso_3_code": "gbg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7506", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7504", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Monzombo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kpala", "iso_1_code": null, "iso_3_code": "kpl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7508", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Monzombo", "iso_1_code": null, "iso_3_code": "moj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7509", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yango", "iso_1_code": null, "iso_3_code": "yng", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7510", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7507", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7495", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7488", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7482", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Sere", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7482", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Sere", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Feroge-Mangaya", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Feroge", "iso_1_code": null, "iso_3_code": "fer", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7513", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mangayat", "iso_1_code": null, "iso_3_code": "myj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7514", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7512", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Indri-Togoyo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Indri", "iso_1_code": null, "iso_3_code": "idr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7516", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Togoyo", "iso_1_code": null, "iso_3_code": "tgy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7517", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7515", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sere-Bviri", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bai-Viri", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bai", "iso_1_code": null, "iso_3_code": "bdj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7520", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Belanda Viri", "iso_1_code": null, "iso_3_code": "bvi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7521", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7519", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ndogo-Sere", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ndogo", "iso_1_code": null, "iso_3_code": "ndz", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7523", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sere", "iso_1_code": null, "iso_3_code": "swf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7524", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tagbu", "iso_1_code": null, "iso_3_code": "tbm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7525", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7522", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7518", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] + } + ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" } - ], + }, "node_i": "7511", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7481", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Zande", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7481", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Zande", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Barambo-Pambia", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Barambu", "iso_1_code": null, "iso_3_code": "brm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7528", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pambia", "iso_1_code": null, "iso_3_code": "pmb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7529", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7527", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zande-Nzakara", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Geme", "iso_1_code": null, "iso_3_code": "geq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7531", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kpatili", "iso_1_code": null, "iso_3_code": "kym", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7532", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nzakara", "iso_1_code": null, "iso_3_code": "nzk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7533", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zande", "iso_1_code": null, "iso_3_code": "zne", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7534", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7530", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7526", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7423", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7287", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Gur", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7287", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Gur", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Bariba", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Baatonum", "iso_1_code": null, "iso_3_code": "bba", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7537", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "7536", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Central", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7536", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Central", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bwamu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bomu", "iso_1_code": null, "iso_3_code": "bmq", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7541", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Buamu", "iso_1_code": null, "iso_3_code": "box", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7542", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bwamu, L\u00e1\u00e1 L\u00e1\u00e1", "iso_1_code": null, "iso_3_code": "bwj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7543", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bwamu, Cwi", "iso_1_code": null, "iso_3_code": "bwy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7544", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7540", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kurumfe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Koromf\u00e9", "iso_1_code": null, "iso_3_code": "kfz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7546", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7545", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oti-Volta", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Buli-Koma", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Buli", "iso_1_code": null, "iso_3_code": "bwu", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7549", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Konni", "iso_1_code": null, "iso_3_code": "kma", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7550", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "7548", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Eastern", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7548", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Eastern", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Biali", "iso_1_code": null, "iso_3_code": "beh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7552", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mbelime", "iso_1_code": null, "iso_3_code": "mql", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7553", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ditammari", "iso_1_code": null, "iso_3_code": "tbz", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7554", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Waama", "iso_1_code": null, "iso_3_code": "wwa", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7555", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "7551", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Gurma", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7551", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Gurma", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Ngangam", "iso_1_code": null, "iso_3_code": "gng", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7557", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Gourmanch\u00e9ma", "iso_1_code": null, "iso_3_code": "gux", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7558", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nateni", "iso_1_code": null, "iso_3_code": "ntm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7559", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miyobe", "iso_1_code": null, "iso_3_code": "soy", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7560", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Konkomba", "iso_1_code": null, "iso_3_code": "xon", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7561", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Moba", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bimoba", "iso_1_code": null, "iso_3_code": "bim", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7563", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Moba", "iso_1_code": null, "iso_3_code": "mfq", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7564", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "7562", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Ntcham", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7562", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Ntcham", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Akaselem", "iso_1_code": null, "iso_3_code": "aks", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7566", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ntcham", "iso_1_code": null, "iso_3_code": "bud", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7567", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7565", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7556", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Western", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7556", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Western", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Nootre", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Notre", "iso_1_code": null, "iso_3_code": "bly", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7570", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7569", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northwest", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Farefare", "iso_1_code": null, "iso_3_code": "gur", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7572", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Moore", "iso_1_code": null, "iso_3_code": "mos", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7573", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Safaliba", "iso_1_code": null, "iso_3_code": "saf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7574", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wali", "iso_1_code": null, "iso_3_code": "wlx", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7575", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Dagaari-Birifor", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Birifor", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Birifor, Malba", "iso_1_code": null, "iso_3_code": "bfo", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7578", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Birifor, Southern", "iso_1_code": null, "iso_3_code": "biv", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7579", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "7577", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Dagaari", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7577", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Dagaari", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Dagaare, Southern", "iso_1_code": null, "iso_3_code": "dga", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7581", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Dagaari Dioula", "iso_1_code": null, "iso_3_code": "dgd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7582", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dagara, Northern", "iso_1_code": null, "iso_3_code": "dgi", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7583", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7580", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7576", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7571", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Southeast", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7571", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Southeast", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Dagbani", "iso_1_code": null, "iso_3_code": "dag", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7585", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Hanga", "iso_1_code": null, "iso_3_code": "hag", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7586", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kamara", "iso_1_code": null, "iso_3_code": "jmr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7587", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kusaal", "iso_1_code": null, "iso_3_code": "kus", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7588", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mampruli", "iso_1_code": null, "iso_3_code": "maw", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7589", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kantosi", "iso_1_code": null, "iso_3_code": "xkt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7590", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7584", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7568", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Yom-Nawdm", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7568", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Yom-Nawdm", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Nawdm", "iso_1_code": null, "iso_3_code": "nmz", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7592", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yom", "iso_1_code": null, "iso_3_code": "pil", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7593", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7591", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7547", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7539", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Southern", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7539", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Southern", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Dogoso-Khe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dogoso", "iso_1_code": null, "iso_3_code": "dgs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7596", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khe", "iso_1_code": null, "iso_3_code": "kqg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7597", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7595", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dyan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dyan", "iso_1_code": null, "iso_3_code": "dya", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7599", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7598", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gan-Dogose", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Dogos\u00e9", "iso_1_code": null, "iso_3_code": "dos", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7601", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kaansa", "iso_1_code": null, "iso_3_code": "gna", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7602", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Khisa", "iso_1_code": null, "iso_3_code": "kqm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7603", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7600", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Grusi", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7600", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Grusi", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bago-Kusuntu", "iso_1_code": null, "iso_3_code": "bqg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7606", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chala", "iso_1_code": null, "iso_3_code": "cll", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7607", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lukpa", "iso_1_code": null, "iso_3_code": "dop", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7608", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kabiy\u00e8", "iso_1_code": null, "iso_3_code": "kbp", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7609", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tem", "iso_1_code": null, "iso_3_code": "kdh", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7610", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Lama", "iso_1_code": null, "iso_3_code": "las", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7611", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Delo", "iso_1_code": null, "iso_3_code": "ntr", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7612", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "7605", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Northern", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7605", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Northern", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Kalams\u00e9", "iso_1_code": null, "iso_3_code": "knz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7614", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ly\u00e9l\u00e9", "iso_1_code": null, "iso_3_code": "lee", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7615", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nuni, Southern", "iso_1_code": null, "iso_3_code": "nnw", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7616", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nuni, Northern", "iso_1_code": null, "iso_3_code": "nuv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7617", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pana", "iso_1_code": null, "iso_3_code": "pnq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7618", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kasem", "iso_1_code": null, "iso_3_code": "xsm", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7619", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "7613", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Western", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7613", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Western", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Chakali", "iso_1_code": null, "iso_3_code": "cli", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7621", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Winy\u00e9", "iso_1_code": null, "iso_3_code": "kst", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7622", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Deg", "iso_1_code": null, "iso_3_code": "mzw", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7623", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Phuie", "iso_1_code": null, "iso_3_code": "pug", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7624", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Paasaal", "iso_1_code": null, "iso_3_code": "sig", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7625", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sisaala, Tumulung", "iso_1_code": null, "iso_3_code": "sil", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7626", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sissala", "iso_1_code": null, "iso_3_code": "sld", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7627", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sisaala, Western", "iso_1_code": null, "iso_3_code": "ssl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7628", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tampulma", "iso_1_code": null, "iso_3_code": "tpm", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7629", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Vagla", "iso_1_code": null, "iso_3_code": "vag", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7630", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7620", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7604", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Kirma-Tyurama", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7604", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Kirma-Tyurama", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Cerma", "iso_1_code": null, "iso_3_code": "cme", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7632", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Turka", "iso_1_code": null, "iso_3_code": "tuz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7633", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7631", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7594", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] + } + ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" } - ], + }, "node_i": "7538", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kulango", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kulango, Bondoukou", "iso_1_code": null, "iso_3_code": "kzc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7635", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kulango, Bouna", "iso_1_code": null, "iso_3_code": "nku", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7636", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7634", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lobi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Lobi", "iso_1_code": null, "iso_3_code": "lob", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7638", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "7637", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Senufo", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7637", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Senufo", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Karaboro", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Karaboro, Western", "iso_1_code": null, "iso_3_code": "kza", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7641", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karaboro, Eastern", "iso_1_code": null, "iso_3_code": "xrb", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7642", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7640", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kpalaga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "S\u00e9noufo, Palaka", "iso_1_code": null, "iso_3_code": "plr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7644", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7643", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nafaanra", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Nafaanra", "iso_1_code": null, "iso_3_code": "nfr", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7646", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7645", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Senari", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "S\u00e9noufo, Cebaara", "iso_1_code": null, "iso_3_code": "sef", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7648", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "S\u00e9noufo, Senara", "iso_1_code": null, "iso_3_code": "seq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7649", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "S\u00e9noufo, Syenara", "iso_1_code": null, "iso_3_code": "shz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7650", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7647", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Suppire-Mamara", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "S\u00e9noufo, Mamara", "iso_1_code": null, "iso_3_code": "myk", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7652", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "S\u00e9noufo, Shempire", "iso_1_code": null, "iso_3_code": "seb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7653", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "S\u00e9noufo, Nanerig\u00e9", "iso_1_code": null, "iso_3_code": "sen", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7654", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "S\u00e9noufo, S\u00ecc\u00ect\u00e9", "iso_1_code": null, "iso_3_code": "sep", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7655", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "S\u00e9noufo, Supyire", "iso_1_code": null, "iso_3_code": "spp", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7656", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "7651", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Tagwana-Djimini", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7651", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Tagwana-Djimini", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "S\u00e9noufo, Djimini", "iso_1_code": null, "iso_3_code": "dyi", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7658", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "S\u00e9noufo, Nyarafolo", "iso_1_code": null, "iso_3_code": "sev", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7659", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "S\u00e9noufo, Tagwana", "iso_1_code": null, "iso_3_code": "tgw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7660", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7657", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7639", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teen", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Loma", "iso_1_code": null, "iso_3_code": "loi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7662", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "T\u00e9\u00e9n", "iso_1_code": null, "iso_3_code": "lor", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7663", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7661", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tiefo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ti\u00e9fo", "iso_1_code": null, "iso_3_code": "tiq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7665", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7664", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tusia", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Toussian, Northern", "iso_1_code": null, "iso_3_code": "tsp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7667", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Toussian, Southern", "iso_1_code": null, "iso_3_code": "wib", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7668", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7666", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Viemo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Viemo", "iso_1_code": null, "iso_3_code": "vig", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7670", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7669", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wara-Natioro", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Natioro", "iso_1_code": null, "iso_3_code": "nti", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7672", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Paleni", "iso_1_code": null, "iso_3_code": "pnl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7673", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wara", "iso_1_code": null, "iso_3_code": "wbf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7674", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7671", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7535", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7286", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5448", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "5321", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Kordofanian", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "5321", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Kordofanian", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Heiban", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ko", "iso_1_code": null, "iso_3_code": "fuj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7678", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Warnang", "iso_1_code": null, "iso_3_code": "wrn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7679", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7677", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West-Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ebang-Logol", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ebang-Laru", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Heiban", "iso_1_code": null, "iso_3_code": "hbn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7684", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Laro", "iso_1_code": null, "iso_3_code": "lro", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7685", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7683", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Logol", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Logol", "iso_1_code": null, "iso_3_code": "lof", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7687", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7686", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Utoro", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Otoro", "iso_1_code": null, "iso_3_code": "otr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7689", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7688", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7682", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rere", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Koalib", "iso_1_code": null, "iso_3_code": "kib", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7691", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7690", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7681", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shirumba", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Shwai", "iso_1_code": null, "iso_3_code": "shw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7693", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7692", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Moro", "iso_1_code": null, "iso_3_code": "mor", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7695", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tira", "iso_1_code": null, "iso_3_code": "tic", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7696", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7694", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7680", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7676", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Katla", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Katla", "iso_1_code": null, "iso_3_code": "kcr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7698", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tima", "iso_1_code": null, "iso_3_code": "tms", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7699", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7697", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rashad", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tegali", "iso_1_code": null, "iso_3_code": "ras", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7701", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tagoi", "iso_1_code": null, "iso_3_code": "tag", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7702", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7700", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Talodi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Talodi Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jomang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Talodi", "iso_1_code": null, "iso_3_code": "tlo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7706", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7705", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nding", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nding", "iso_1_code": null, "iso_3_code": "eli", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7708", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7707", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngile-Dengebu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dagik", "iso_1_code": null, "iso_3_code": "dec", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7710", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngile", "iso_1_code": null, "iso_3_code": "jle", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7711", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7709", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tocho", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Acheron", "iso_1_code": null, "iso_3_code": "acz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7713", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lumun", "iso_1_code": null, "iso_3_code": "lmd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7714", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tocho", "iso_1_code": null, "iso_3_code": "taz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7715", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Torona", "iso_1_code": null, "iso_3_code": "tqr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7716", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7712", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7704", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tegem", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lafofa", "iso_1_code": null, "iso_3_code": "laf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7718", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7717", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7703", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7675", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Mande", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7675", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Mande", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bissa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bisa", "iso_1_code": null, "iso_3_code": "bib", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7723", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "7722", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Busa", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7722", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Busa", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Boko", "iso_1_code": null, "iso_3_code": "bqc", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7725", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bis\u00e3", "iso_1_code": null, "iso_3_code": "bqp", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7726", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bokobaru", "iso_1_code": null, "iso_3_code": "bus", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7727", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7724", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kyanga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Shanga", "iso_1_code": null, "iso_3_code": "sho", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7729", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kyanga", "iso_1_code": null, "iso_3_code": "tye", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7730", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7728", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Samo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Samo, Southern", "iso_1_code": null, "iso_3_code": "sbd", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7732", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Samo, Matya", "iso_1_code": null, "iso_3_code": "stj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7733", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Samo, Maya", "iso_1_code": null, "iso_3_code": "sym", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7734", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7731", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7721", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Southeastern", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7721", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Southeastern", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Gbin", "iso_1_code": null, "iso_3_code": "xgb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7736", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Guro-Tura", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Guro-Yaoure", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Guro", "iso_1_code": null, "iso_3_code": "goa", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7739", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yaour\u00e9", "iso_1_code": null, "iso_3_code": "yre", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7740", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "7738", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Tura-Dan-Mano", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7738", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Tura-Dan-Mano", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Mano", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Maan", "iso_1_code": null, "iso_3_code": "mev", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7743", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "7742", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Tura-Dan", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7742", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Tura-Dan", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Dan", "iso_1_code": null, "iso_3_code": "dnj", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7745", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Goo", "iso_1_code": null, "iso_3_code": "gov", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7746", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kla-Dan", "iso_1_code": null, "iso_3_code": "lda", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7747", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Toura", "iso_1_code": null, "iso_3_code": "neb", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7748", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7744", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7741", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7737", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Nwa-Ben", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7737", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Nwa-Ben", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Ben-Gban", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gban", "iso_1_code": null, "iso_3_code": "ggu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7751", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngen", "iso_1_code": null, "iso_3_code": "gnj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7752", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Beng", "iso_1_code": null, "iso_3_code": "nhb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7753", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7750", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wan-Mwan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Mwan", "iso_1_code": null, "iso_3_code": "moa", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7755", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Wan", "iso_1_code": null, "iso_3_code": "wan", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7756", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7754", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7749", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7735", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7720", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Western", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7720", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Western", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Central-Southwestern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Manding-Jogo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Jogo-Jeri", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jeri Kuo", "iso_1_code": null, "iso_3_code": "jek", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7762", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jeri-Jalkuna", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jalkunan", "iso_1_code": null, "iso_3_code": "bxl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7764", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7763", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jogo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ligbi", "iso_1_code": null, "iso_3_code": "lig", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7766", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tonjon", "iso_1_code": null, "iso_3_code": "tjn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7767", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7765", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7761", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manding-Vai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { - "name": "Manding-Mokole", - "iso_1_code": null, - "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, + "name": "Manding-Mokole", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Manding", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bolon", "iso_1_code": null, "iso_3_code": "bof", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7771", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jahanka", "iso_1_code": null, "iso_3_code": "jad", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7772", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sininkere", "iso_1_code": null, "iso_3_code": "skq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7773", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manding-East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Marka-Dafin", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Marka", "iso_1_code": null, "iso_3_code": "rkm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7776", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7775", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northeastern Manding", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bamana", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bamanankan", "iso_1_code": "bm", "iso_3_code": "bam", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7779", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Jula", "iso_1_code": null, "iso_3_code": "dyu", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7780", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7778", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7777", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Southeastern Manding", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7777", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Southeastern Manding", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Maninkakan, Eastern", "iso_1_code": null, "iso_3_code": "emk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7782", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Konyanka", "iso_1_code": null, "iso_3_code": "mku", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7783", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maninka, Sankaran", "iso_1_code": null, "iso_3_code": "msc", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7784", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Manya", "iso_1_code": null, "iso_3_code": "mzj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7785", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maninka-Mori", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Wojenaka", "iso_1_code": null, "iso_3_code": "jod", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7787", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Worodougou", "iso_1_code": null, "iso_3_code": "jud", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7788", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koro", "iso_1_code": null, "iso_3_code": "kfo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7789", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koyaga", "iso_1_code": null, "iso_3_code": "kga", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7790", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mahou", "iso_1_code": null, "iso_3_code": "mxx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7791", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7786", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7781", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7774", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Manding-West", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7774", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Manding-West", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Xaasongaxango", "iso_1_code": null, "iso_3_code": "kao", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7793", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Maninkakan, Western", "iso_1_code": null, "iso_3_code": "mlq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7794", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mandinka", "iso_1_code": null, "iso_3_code": "mnk", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7795", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Maninkakan, Kita", "iso_1_code": null, "iso_3_code": "mwk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7796", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kagoro", "iso_1_code": null, "iso_3_code": "xkg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7797", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7792", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7770", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Mokole", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7770", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Mokole", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Kakabe", "iso_1_code": null, "iso_3_code": "kke", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7799", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuranko", "iso_1_code": null, "iso_3_code": "knk", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7800", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Lele", "iso_1_code": null, "iso_3_code": "llc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7801", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mogofin", "iso_1_code": null, "iso_3_code": "mfg", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7802", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7798", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7769", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Vai-Kono", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7769", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Vai-Kono", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Kono", "iso_1_code": null, "iso_3_code": "kno", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7804", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Vai", "iso_1_code": null, "iso_3_code": "vai", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7805", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7803", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7768", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7760", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Susu-Yalunka", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7760", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Susu-Yalunka", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Susu", "iso_1_code": null, "iso_3_code": "sus", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7807", + "native_tokenizers": [], "scripts": [ "Arab", "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yalunka", "iso_1_code": null, "iso_3_code": "yal", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7808", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] + } + ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" } - ], + }, "node_i": "7806", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7759", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Southwestern", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7759", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Southwestern", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Kpelle", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kpelle, Guinea", "iso_1_code": null, "iso_3_code": "gkp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7811", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kono", "iso_1_code": null, "iso_3_code": "knu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7812", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kpelle, Liberia", "iso_1_code": null, "iso_3_code": "xpe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7813", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7810", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mende-Loma", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Zialo", "iso_1_code": null, "iso_3_code": "zil", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7815", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Loma", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Loma", "iso_1_code": null, "iso_3_code": "lom", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7817", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Toma", "iso_1_code": null, "iso_3_code": "tod", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7818", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "7816", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Mende-Bandi", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7816", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Mende-Bandi", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Bandi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bandi", "iso_1_code": null, "iso_3_code": "bza", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7821", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7820", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mende-Loko", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Loko", "iso_1_code": null, "iso_3_code": "lok", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7823", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mende", "iso_1_code": null, "iso_3_code": "men", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7824", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7822", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7819", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7814", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7809", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "7758", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Northwestern", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "7758", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Northwestern", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Kpeego", "iso_1_code": null, "iso_3_code": "cpo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7826", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Samogo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bankagooma", "iso_1_code": null, "iso_3_code": "bxw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7828", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dz\u00f9\u00f9ngoo", "iso_1_code": null, "iso_3_code": "dnn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7829", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Duungooma", "iso_1_code": null, "iso_3_code": "dux", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7830", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jowulu", "iso_1_code": null, "iso_3_code": "jow", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7831", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Seenku", "iso_1_code": null, "iso_3_code": "sos", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7832", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7827", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Soninke-Bobo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bobo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tn\")", - "original_lang_name": "tswana", - "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Konab\u00e9r\u00e9", "iso_1_code": null, "iso_3_code": "bbo", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7835", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bobo Madar\u00e9, Southern", "iso_1_code": null, "iso_3_code": "bwq", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tn\")", "original_lang_name": "tswana", "original_lang_code": "tsn", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "7836", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7834", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Soninke-Boso", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Boso", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bozo, Tiemac\u00e8w\u00e8", "iso_1_code": null, "iso_3_code": "boo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7840", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bozo, Tieyaxo", "iso_1_code": null, "iso_3_code": "boz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7841", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bozo, Kelengaxo", "iso_1_code": null, "iso_3_code": "bzx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7842", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7839", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jenaama", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bozo, Jenaama", "iso_1_code": null, "iso_3_code": "bze", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7844", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7843", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7838", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Soninke", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Soninke", "iso_1_code": null, "iso_3_code": "snk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7846", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7845", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7837", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7833", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7825", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7757", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "7719", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mbre", "iso_1_code": null, "iso_3_code": "mka", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7848", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7847", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tn\")", + "original_lang_name": "tswana", + "original_lang_code": "tsn", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "5320", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Nilo-Saharan.json b/data/Nilo-Saharan.json index 1f3354bd22bfc8c76a2de70a49b64b802792a4f2..a720aab420156837e1db2415f013f2e734529263 100644 --- a/data/Nilo-Saharan.json +++ b/data/Nilo-Saharan.json @@ -2,3686 +2,3686 @@ "name": "Nilo-Saharan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kuliak", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ik", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ik", "iso_1_code": null, "iso_3_code": "ikx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7852", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7851", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngangea-So", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nyang\u2019i", "iso_1_code": null, "iso_3_code": "nyp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7854", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Soo", "iso_1_code": null, "iso_3_code": "teu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7855", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7853", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7850", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saharan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Berti", "iso_1_code": null, "iso_3_code": "byt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7858", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zaghawa", "iso_1_code": null, "iso_3_code": "zag", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7859", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7857", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kanuri", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kanuri, Bilma", "iso_1_code": null, "iso_3_code": "bms", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7862", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kanembu", "iso_1_code": null, "iso_3_code": "kbl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7863", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kanuri, Manga", "iso_1_code": "kr", "iso_3_code": "kby", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7864", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kanuri, Yerwa", "iso_1_code": "kr", "iso_3_code": "knc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7865", + "native_tokenizers": [], "scripts": [ "Latn", "Arab" - ], - "own_tokenizer": false + ] }, { "name": "Kanuri, Tumari", "iso_1_code": "kr", "iso_3_code": "krt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7866", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tarjumo", "iso_1_code": null, "iso_3_code": "txj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7867", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7861", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tebu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dazaga", "iso_1_code": null, "iso_3_code": "dzg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7869", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tedaga", "iso_1_code": null, "iso_3_code": "tuq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7870", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7868", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7860", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7856", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Satellite-Core", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Core", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "B\u2019aga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Daats\u02bci\u0301in", "iso_1_code": null, "iso_3_code": "dtn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7874", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gumuz", "iso_1_code": null, "iso_3_code": "guk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7875", + "native_tokenizers": [], "scripts": [ "Ethi" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "7873", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern Sudanic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Northern (k languages)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nara", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nara", "iso_1_code": null, "iso_3_code": "nrb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7879", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7878", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nubian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mattokki", "iso_1_code": null, "iso_3_code": "xnz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7882", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Birked", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Birked", "iso_1_code": null, "iso_3_code": "brk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7884", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7883", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dongolawi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Andaandi", "iso_1_code": null, "iso_3_code": "dgl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7886", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7885", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hill", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kadaru-Ghulfan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ghulfan", "iso_1_code": null, "iso_3_code": "ghl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7889", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kadaru", "iso_1_code": null, "iso_3_code": "kdu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7890", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7888", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dilling", "iso_1_code": null, "iso_3_code": "dil", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7892", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dair", "iso_1_code": null, "iso_3_code": "drb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7893", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "El Hugeirat", "iso_1_code": null, "iso_3_code": "elh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7894", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karko", "iso_1_code": null, "iso_3_code": "kko", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7895", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wali", "iso_1_code": null, "iso_3_code": "wll", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7896", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7891", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7887", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7881", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nobiin", "iso_1_code": null, "iso_3_code": "fia", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7898", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7897", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Midob", "iso_1_code": null, "iso_3_code": "mei", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7900", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7899", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7880", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyimang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Afitti", "iso_1_code": null, "iso_3_code": "aft", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7902", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ama", "iso_1_code": null, "iso_3_code": "nyi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7903", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7901", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tama", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mararit", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mararit", "iso_1_code": null, "iso_3_code": "mgb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7906", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7905", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tama-Sungor", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Assangori", "iso_1_code": null, "iso_3_code": "sjg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7908", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tama", "iso_1_code": null, "iso_3_code": "tma", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7909", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7907", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7904", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7877", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern (n languages)", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Daju", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Eastern Daju", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Logorik", "iso_1_code": null, "iso_3_code": "liu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7913", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shatt", "iso_1_code": null, "iso_3_code": "shj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7914", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7912", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western Daju", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Baygo", "iso_1_code": null, "iso_3_code": "byg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7916", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Daju, Dar Fur", "iso_1_code": null, "iso_3_code": "daj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7917", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Daju, Dar Sila", "iso_1_code": null, "iso_3_code": "dau", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7918", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Daju, Dar Daju", "iso_1_code": null, "iso_3_code": "djc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7919", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Njalgulgule", "iso_1_code": null, "iso_3_code": "njl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7920", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7915", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7911", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern Jebel", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aka-Kelo-Molo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aka", "iso_1_code": null, "iso_3_code": "soh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7923", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kelo", "iso_1_code": null, "iso_3_code": "xel", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7924", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Molo", "iso_1_code": null, "iso_3_code": "zmo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7925", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7922", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gaam", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gaam", "iso_1_code": null, "iso_3_code": "tbi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7927", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7926", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7921", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nilotic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bari", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bari", "iso_1_code": null, "iso_3_code": "bfa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7931", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kakwa", "iso_1_code": null, "iso_3_code": "keo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7932", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mandari", "iso_1_code": null, "iso_3_code": "mqu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7933", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuku", "iso_1_code": null, "iso_3_code": "ukv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7934", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7930", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lotuxo-Teso", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lotuxo-Maa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lotuxo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dongotono", "iso_1_code": null, "iso_3_code": "ddd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7938", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Imotong", "iso_1_code": null, "iso_3_code": "imt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7939", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lango", "iso_1_code": null, "iso_3_code": "lgo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7940", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lokoya", "iso_1_code": null, "iso_3_code": "lky", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7941", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Otuho", "iso_1_code": null, "iso_3_code": "lot", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7942", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lopit", "iso_1_code": null, "iso_3_code": "lpx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7943", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Logir", "iso_1_code": null, "iso_3_code": "lqr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7944", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Okolie", "iso_1_code": null, "iso_3_code": "oie", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7945", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7937", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ongamo-Maa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Maasai", "iso_1_code": null, "iso_3_code": "mas", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7947", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ngasa", "iso_1_code": null, "iso_3_code": "nsg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7948", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Samburu", "iso_1_code": null, "iso_3_code": "saq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7949", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7946", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7936", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teso-Turkana", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Teso", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ateso", "iso_1_code": null, "iso_3_code": "teo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7952", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "7951", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Turkana", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ng\u2019akarimojong", "iso_1_code": null, "iso_3_code": "kdj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7954", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nyangatom", "iso_1_code": null, "iso_3_code": "nnj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7955", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Toposa", "iso_1_code": null, "iso_3_code": "toq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7956", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Turkana", "iso_1_code": null, "iso_3_code": "tuv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7957", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "7953", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7950", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7935", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7929", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kalenjin", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Elgon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kupsapiiny", "iso_1_code": null, "iso_3_code": "kpz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7961", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sabaot", "iso_1_code": null, "iso_3_code": "spy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7962", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "7960", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nandi-Markweta", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kipsigis", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kipsigis", "iso_1_code": null, "iso_3_code": "sgc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7965", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "7964", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Markweta", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Markweeta", "iso_1_code": null, "iso_3_code": "enb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7967", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "7966", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nandi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Keiyo", "iso_1_code": null, "iso_3_code": "eyo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7969", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kisankasa", "iso_1_code": null, "iso_3_code": "kqh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7970", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nandi", "iso_1_code": null, "iso_3_code": "niq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7971", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Terik", "iso_1_code": null, "iso_3_code": "tec", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7972", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tugen", "iso_1_code": null, "iso_3_code": "tuy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7973", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7968", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7963", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Okiek", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Okiek", "iso_1_code": null, "iso_3_code": "oki", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7975", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7974", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pokot", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "P\u00f6koot", "iso_1_code": null, "iso_3_code": "pko", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7977", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7976", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7959", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tatoga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Omotik", "iso_1_code": null, "iso_3_code": "omt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7979", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Datooga", "iso_1_code": null, "iso_3_code": "tcc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7980", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "7978", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7958", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dinka-Nuer", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dinka", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dinka, South Central", "iso_1_code": null, "iso_3_code": "dib", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7984", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dinka, Southwestern", "iso_1_code": null, "iso_3_code": "dik", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7985", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Dinka, Northeastern", "iso_1_code": null, "iso_3_code": "dip", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7986", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Dinka, Northwestern", "iso_1_code": null, "iso_3_code": "diw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7987", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dinka, Southeastern", "iso_1_code": null, "iso_3_code": "dks", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7988", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "7983", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nuer", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Reel", "iso_1_code": null, "iso_3_code": "atu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7990", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nuer", "iso_1_code": null, "iso_3_code": "nus", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7991", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "7989", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7982", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Luo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Anuak", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Anuak", "iso_1_code": null, "iso_3_code": "anu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7995", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7994", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bor", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Belanda Bor", "iso_1_code": null, "iso_3_code": "bxb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7997", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7996", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jur", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Luwo", "iso_1_code": null, "iso_3_code": "lwo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "7999", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "7998", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mabaan-Burun", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Burun", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Burun", "iso_1_code": null, "iso_3_code": "bdi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8002", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8001", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mabaan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jumjum", "iso_1_code": null, "iso_3_code": "jum", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8004", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mabaan", "iso_1_code": null, "iso_3_code": "mfz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8005", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8003", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8000", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shilluk", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Shilluk", "iso_1_code": null, "iso_3_code": "shk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8007", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8006", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Thuri", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Thuri", "iso_1_code": null, "iso_3_code": "thu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8009", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8008", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "P\u00e4ri", "iso_1_code": null, "iso_3_code": "lkr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8011", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8010", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7993", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Adhola", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jopadhola", "iso_1_code": null, "iso_3_code": "adh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8014", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8013", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuman", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kumam", "iso_1_code": null, "iso_3_code": "kdi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8016", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8015", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Luo-Acholi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Alur-Acholi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Thur", "iso_1_code": null, "iso_3_code": "lth", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8019", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Alur", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Alur", "iso_1_code": null, "iso_3_code": "alz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8021", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8020", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lango-Acholi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Acholi", "iso_1_code": null, "iso_3_code": "ach", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8023", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Lango", "iso_1_code": null, "iso_3_code": "laj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8024", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8022", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8018", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Luo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dholuo", "iso_1_code": null, "iso_3_code": "luo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8026", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8025", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8017", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8012", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7992", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7981", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7928", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Surmic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Majang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Majang", "iso_1_code": null, "iso_3_code": "mpe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8030", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8029", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8028", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Southeast", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kwegu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kwegu", "iso_1_code": null, "iso_3_code": "xwg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8034", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8033", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pastoral", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Me\u2019en", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Me\u2019en", "iso_1_code": null, "iso_3_code": "mym", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8037", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8036", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Suri", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mursi", "iso_1_code": null, "iso_3_code": "muz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8039", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Suri, Tirmaga-Chai", "iso_1_code": null, "iso_3_code": "suq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8040", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8038", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8035", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8032", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southwest", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Didinga-Murle", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Didinga-Longarim", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Didinga", "iso_1_code": null, "iso_3_code": "did", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8044", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Laarim", "iso_1_code": null, "iso_3_code": "loh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8045", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8043", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Murle", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Murle", "iso_1_code": null, "iso_3_code": "mur", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8047", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8046", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tennet", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tennet", "iso_1_code": null, "iso_3_code": "tex", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8049", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8048", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8042", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kacipo-Balesi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Suri, Kacipo-Bale", "iso_1_code": null, "iso_3_code": "koe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8051", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8050", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8041", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8031", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8027", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Temein", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tese", "iso_1_code": null, "iso_3_code": "keg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8053", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Temein", "iso_1_code": null, "iso_3_code": "teq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8054", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8052", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7910", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7876", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kadugli-Krongo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kanga", "iso_1_code": null, "iso_3_code": "kcp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8056", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Keiga", "iso_1_code": null, "iso_3_code": "kec", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8057", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Krongo", "iso_1_code": null, "iso_3_code": "kgo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8058", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tumtum", "iso_1_code": null, "iso_3_code": "tbr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8059", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tulishi", "iso_1_code": null, "iso_3_code": "tey", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8060", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Katcha-Kadugli-Miri", "iso_1_code": null, "iso_3_code": "xtc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8061", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8055", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koman", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gule", "iso_1_code": null, "iso_3_code": "gly", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8063", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gwama", "iso_1_code": null, "iso_3_code": "kmq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8064", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Opo", "iso_1_code": null, "iso_3_code": "lgn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8065", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uduk", "iso_1_code": null, "iso_3_code": "udu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8066", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Komo", "iso_1_code": null, "iso_3_code": "xom", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8067", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8062", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7872", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Satellites", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Berta", "iso_1_code": null, "iso_3_code": "wti", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8069", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central Sudanic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lendu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ndrulo", "iso_1_code": null, "iso_3_code": "dno", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8073", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lendu", "iso_1_code": null, "iso_3_code": "led", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8074", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ngiti", "iso_1_code": null, "iso_3_code": "niy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8075", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8072", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mangbetu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Asoa", "iso_1_code": null, "iso_3_code": "asv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8077", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lombi", "iso_1_code": null, "iso_3_code": "lmi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8078", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mangbetu", "iso_1_code": null, "iso_3_code": "mdj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8079", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8076", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mangbutu-Efe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bendi", "iso_1_code": null, "iso_3_code": "bct", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8081", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Efe", "iso_1_code": null, "iso_3_code": "efe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8082", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lese", "iso_1_code": null, "iso_3_code": "les", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8083", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mamvu", "iso_1_code": null, "iso_3_code": "mdi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8084", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mangbutu", "iso_1_code": null, "iso_3_code": "mdk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8085", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mvuba", "iso_1_code": null, "iso_3_code": "mxh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8086", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kebu", "iso_1_code": null, "iso_3_code": "ndp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8087", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8080", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Moru-Madi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Avokaya", "iso_1_code": null, "iso_3_code": "avu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8090", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Keliko", "iso_1_code": null, "iso_3_code": "kbo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8091", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Lugbara", "iso_1_code": null, "iso_3_code": "lgg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8092", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Logo", "iso_1_code": null, "iso_3_code": "log", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8093", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Aringa", "iso_1_code": null, "iso_3_code": "luc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8094", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Omi", "iso_1_code": null, "iso_3_code": "omi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8095", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8089", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Moru", "iso_1_code": null, "iso_3_code": "mgd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8097", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8096", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Olu\u2019bo", "iso_1_code": null, "iso_3_code": "lul", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8099", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ma\u2019di", "iso_1_code": null, "iso_3_code": "mhi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8100", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ma\u2019di, Southern", "iso_1_code": null, "iso_3_code": "snm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8101", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8098", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8088", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8071", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bongo-Bagirmi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bongo-Baka", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Beli", "iso_1_code": null, "iso_3_code": "blm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8105", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Baka", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Baka", "iso_1_code": null, "iso_3_code": "bdh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8107", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8106", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bongo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bongo", "iso_1_code": null, "iso_3_code": "bot", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8109", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8108", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "M\u00f6d\u00f6-Nyamusa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jur Modo", "iso_1_code": null, "iso_3_code": "bex", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8111", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nyamusa-Molo", "iso_1_code": null, "iso_3_code": "nwm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8112", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8110", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Morokodo-Mo\u2019da", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mo\u2019da", "iso_1_code": null, "iso_3_code": "gbn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8114", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Morokodo", "iso_1_code": null, "iso_3_code": "mgc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8115", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8113", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mittu", "iso_1_code": null, "iso_3_code": "mwu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8117", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8116", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8104", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kara", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Fulu", "iso_1_code": null, "iso_3_code": "fuu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8119", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gula", "iso_1_code": null, "iso_3_code": "kcm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8120", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yulu", "iso_1_code": null, "iso_3_code": "yul", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8121", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8118", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sara-Bagirmi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Birri", "iso_1_code": null, "iso_3_code": "bvq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8123", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Fongoro", "iso_1_code": null, "iso_3_code": "fgr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8124", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bagirmi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Morom", "iso_1_code": null, "iso_3_code": "bdo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8126", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bagirmi", "iso_1_code": null, "iso_3_code": "bmi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8127", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Berakou", "iso_1_code": null, "iso_3_code": "bxv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8128", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Disa", "iso_1_code": null, "iso_3_code": "dsi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8129", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gula", "iso_1_code": null, "iso_3_code": "glu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8130", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jaya", "iso_1_code": null, "iso_3_code": "jyy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8131", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kenga", "iso_1_code": null, "iso_3_code": "kyq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8132", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Naba", "iso_1_code": null, "iso_3_code": "mne", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8133", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8125", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sara", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sara Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bedjond", "iso_1_code": null, "iso_3_code": "bjv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8136", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Dagba", "iso_1_code": null, "iso_3_code": "dgk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8137", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gor", "iso_1_code": null, "iso_3_code": "gqr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8138", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Gulay", "iso_1_code": null, "iso_3_code": "gvl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8139", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Horo", "iso_1_code": null, "iso_3_code": "hor", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8140", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kabba", "iso_1_code": null, "iso_3_code": "ksp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8141", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Laka", "iso_1_code": null, "iso_3_code": "lap", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8142", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mango", "iso_1_code": null, "iso_3_code": "mge", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8143", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sar", "iso_1_code": null, "iso_3_code": "mwm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8144", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mbay", "iso_1_code": null, "iso_3_code": "myb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8145", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ngam", "iso_1_code": null, "iso_3_code": "nmc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8146", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngambay", "iso_1_code": null, "iso_3_code": "sba", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8147", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sara Kaba", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kaba D\u00e9m\u00e9, Sara", "iso_1_code": null, "iso_3_code": "kwg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8149", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaba Naa, Sara", "iso_1_code": null, "iso_3_code": "kwv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8150", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kulfa", "iso_1_code": null, "iso_3_code": "kxj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8151", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sara Kaba", "iso_1_code": null, "iso_3_code": "sbz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8152", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8148", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8135", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vale", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Luto", "iso_1_code": null, "iso_3_code": "ndy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8154", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Vale", "iso_1_code": null, "iso_3_code": "vae", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8155", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8153", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8134", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8122", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sinyar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sinyar", "iso_1_code": null, "iso_3_code": "sys", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8157", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8156", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8103", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kresh", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aja", "iso_1_code": null, "iso_3_code": "aja", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8159", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gbaya", "iso_1_code": null, "iso_3_code": "krs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8160", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8158", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8102", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8070", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Fur", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Amdang", "iso_1_code": null, "iso_3_code": "amj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8162", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Fur", "iso_1_code": null, "iso_3_code": "fvr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8163", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8161", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kunama", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kunama", "iso_1_code": null, "iso_3_code": "kun", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8165", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8164", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maban", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Karanga", "iso_1_code": null, "iso_3_code": "kth", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8167", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kendeje", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kendeje", "iso_1_code": null, "iso_3_code": "klf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8169", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8168", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maba", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Maba", "iso_1_code": null, "iso_3_code": "mde", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8171", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Marfa", "iso_1_code": null, "iso_3_code": "mvu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8172", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8170", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Masalit", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Massalat", "iso_1_code": null, "iso_3_code": "mdg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8174", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Masalit", "iso_1_code": null, "iso_3_code": "mls", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8175", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8173", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Runga-Kibet", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kibet", "iso_1_code": null, "iso_3_code": "kie", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8177", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Runga", "iso_1_code": null, "iso_3_code": "rou", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8178", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8176", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Surbakhal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Surbakhal", "iso_1_code": null, "iso_3_code": "sbj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8180", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8179", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8166", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8068", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7871", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Songhai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Korandje", "iso_1_code": null, "iso_3_code": "kcy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8182", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tadaksahak", "iso_1_code": null, "iso_3_code": "dsq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8184", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tasawaq", "iso_1_code": null, "iso_3_code": "twq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8185", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8183", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dendi", "iso_1_code": null, "iso_3_code": "ddn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8187", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zarma", "iso_1_code": null, "iso_3_code": "dje", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8188", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Songhay, Humburi Senni", "iso_1_code": null, "iso_3_code": "hmb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8189", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Songhay, Koyra Chiini", "iso_1_code": null, "iso_3_code": "khq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8190", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Songhay, Koyraboro Senni", "iso_1_code": null, "iso_3_code": "ses", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8191", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tondi Songway Kiini", "iso_1_code": null, "iso_3_code": "tst", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8192", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8186", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8181", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "7849", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Nimboran.json b/data/Nimboran.json index c8c7878f7a40b9940a66aaa81f2ef9eea8b5a33e..17c08487da5b85ffe06ad98f2ae37acaafadf523 100644 --- a/data/Nimboran.json +++ b/data/Nimboran.json @@ -2,60 +2,60 @@ "name": "Nimboran", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gresi", "iso_1_code": null, "iso_3_code": "grs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8194", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mlap", "iso_1_code": null, "iso_3_code": "kja", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8195", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kemtuik", "iso_1_code": null, "iso_3_code": "kmt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8196", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mekwei", "iso_1_code": null, "iso_3_code": "msf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8197", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nimboran", "iso_1_code": null, "iso_3_code": "nir", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8198", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8193", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/North Bougainville.json b/data/North Bougainville.json index b0a9c72aaf9c7b8e3a7f2bd39ddf058b92d3d8c9..ba29be4c4f3e118b178bb19b29724ec2b8f3f93d 100644 --- a/data/North Bougainville.json +++ b/data/North Bougainville.json @@ -2,85 +2,85 @@ "name": "North Bougainville", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Keriaka", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ramopa", "iso_1_code": null, "iso_3_code": "kjx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8201", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8200", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Konua", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Rapoisi", "iso_1_code": null, "iso_3_code": "kyx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8203", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8202", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rotokas", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Askopan", "iso_1_code": null, "iso_3_code": "eiv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8205", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rotokas", "iso_1_code": null, "iso_3_code": "roo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8206", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8204", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8199", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Otomanguean.json b/data/Otomanguean.json index a035097fb4bc46e921a8860782011b89535e4f0f..86cd24b64e5290bfe2b78b18ff67b37e980bd96b 100644 --- a/data/Otomanguean.json +++ b/data/Otomanguean.json @@ -2,2262 +2,2262 @@ "name": "Otomanguean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Eastern Otomanguean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Amuzgo-Mixtecan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Amuzgo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Amuzgo, Guerrero", "iso_1_code": null, "iso_3_code": "amu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8211", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Amuzgo, San Pedro Amuzgos", "iso_1_code": null, "iso_3_code": "azg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8212", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Amuzgo, Ipalapa", "iso_1_code": null, "iso_3_code": "azm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8213", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8210", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtecan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cuicatec", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cuicatec, Teutila", "iso_1_code": null, "iso_3_code": "cut", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8216", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Cuicatec, Tepeuxila", "iso_1_code": null, "iso_3_code": "cux", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8217", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8215", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mixtec, Western Juxtlahuaca", "iso_1_code": null, "iso_3_code": "jmx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8219", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Yutanduchi", "iso_1_code": null, "iso_3_code": "mab", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8220", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Amoltepec", "iso_1_code": null, "iso_3_code": "mbz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8221", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Itundujia", "iso_1_code": null, "iso_3_code": "mce", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8222", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Santa Luc\u00eda Monteverde", "iso_1_code": null, "iso_3_code": "mdv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8223", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Southwestern Tlaxiaco", "iso_1_code": null, "iso_3_code": "meh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8224", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Atatlahuca", "iso_1_code": null, "iso_3_code": "mib", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8225", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mixtec, Ocotepec", "iso_1_code": null, "iso_3_code": "mie", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8226", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mixtec, San Miguel el Grande", "iso_1_code": null, "iso_3_code": "mig", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8227", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mixtec, Chayuco", "iso_1_code": null, "iso_3_code": "mih", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8228", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mixtec, Chigmecatitl\u00e1n", "iso_1_code": null, "iso_3_code": "mii", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8229", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Pe\u00f1oles", "iso_1_code": null, "iso_3_code": "mil", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8230", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mixtec, Alacatlatzala", "iso_1_code": null, "iso_3_code": "mim", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8231", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mixtec, Pinotepa Nacional", "iso_1_code": null, "iso_3_code": "mio", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8232", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mixtec, Apasco-Apoala", "iso_1_code": null, "iso_3_code": "mip", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8233", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mixtec, Southern Puebla", "iso_1_code": null, "iso_3_code": "mit", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8234", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mixtec, Cacaloxtepec", "iso_1_code": null, "iso_3_code": "miu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8235", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Mixtepec", "iso_1_code": null, "iso_3_code": "mix", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8236", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Ayutla", "iso_1_code": null, "iso_3_code": "miy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8237", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mixtec, Coatzospan", "iso_1_code": null, "iso_3_code": "miz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8238", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mixtec, San Juan Colorado", "iso_1_code": null, "iso_3_code": "mjc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8239", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mixtec, Silacayoapan", "iso_1_code": null, "iso_3_code": "mks", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8240", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mixtec, Yosond\u00faa", "iso_1_code": null, "iso_3_code": "mpm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8241", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mixtec, Tlazoyaltepec", "iso_1_code": null, "iso_3_code": "mqh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8242", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Tututepec", "iso_1_code": null, "iso_3_code": "mtu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8243", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Tida\u00e1", "iso_1_code": null, "iso_3_code": "mtx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8244", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Yucua\u00f1e", "iso_1_code": null, "iso_3_code": "mvg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8245", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Northwest Oaxaca", "iso_1_code": null, "iso_3_code": "mxa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8246", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Tezoatl\u00e1n", "iso_1_code": null, "iso_3_code": "mxb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8247", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mixtec, Huitepec", "iso_1_code": null, "iso_3_code": "mxs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8248", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Jamiltepec", "iso_1_code": null, "iso_3_code": "mxt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8249", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mixtec, Metlat\u00f3noc", "iso_1_code": null, "iso_3_code": "mxv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8250", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mixtec, Southeastern Nochixtl\u00e1n", "iso_1_code": null, "iso_3_code": "mxy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8251", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Santa Mar\u00eda Zacatepec", "iso_1_code": null, "iso_3_code": "mza", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8252", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mixtec, Juxtlahuaca", "iso_1_code": null, "iso_3_code": "vmc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8253", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Ixtayutla", "iso_1_code": null, "iso_3_code": "vmj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8254", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Mitlatongo", "iso_1_code": null, "iso_3_code": "vmm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8255", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Soyaltepec", "iso_1_code": null, "iso_3_code": "vmq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8256", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Tamazola", "iso_1_code": null, "iso_3_code": "vmx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8257", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Alcozauca", "iso_1_code": null, "iso_3_code": "xta", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8258", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Chazumba", "iso_1_code": null, "iso_3_code": "xtb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8259", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Diuxi-Tilantongo", "iso_1_code": null, "iso_3_code": "xtd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8260", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mixtec, Sinicahua", "iso_1_code": null, "iso_3_code": "xti", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8261", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, San Juan Teita", "iso_1_code": null, "iso_3_code": "xtj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8262", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Tijaltepec", "iso_1_code": null, "iso_3_code": "xtl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8263", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Magdalena Pe\u00f1asco", "iso_1_code": null, "iso_3_code": "xtm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8264", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mixtec, Northern Tlaxiaco", "iso_1_code": null, "iso_3_code": "xtn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8265", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mixtec, San Miguel Piedras", "iso_1_code": null, "iso_3_code": "xtp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8266", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Sindihui", "iso_1_code": null, "iso_3_code": "xts", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8267", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Tacahua", "iso_1_code": null, "iso_3_code": "xtt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8268", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Cuyamecalco", "iso_1_code": null, "iso_3_code": "xtu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8269", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mixtec, Yolox\u00f3chitl", "iso_1_code": null, "iso_3_code": "xty", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8270", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8218", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Trique", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Triqui, Copala", "iso_1_code": null, "iso_3_code": "trc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8272", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Triqui, San Mart\u00edn Itunyoso", "iso_1_code": null, "iso_3_code": "trq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8273", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Triqui, Chicahuaxtla", "iso_1_code": null, "iso_3_code": "trs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8274", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8271", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8214", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8209", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Popolocan-Zapotecan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Popolocan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chocholtec", "iso_1_code": null, "iso_3_code": "coz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8277", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ixcatec", "iso_1_code": null, "iso_3_code": "ixc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8278", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mazatec", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mazatec, San Jer\u00f3nimo Tec\u00f3atl", "iso_1_code": null, "iso_3_code": "maa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8280", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mazatec, Jalapa de D\u00edaz", "iso_1_code": null, "iso_3_code": "maj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8281", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mazatec, Chiquihuitl\u00e1n", "iso_1_code": null, "iso_3_code": "maq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8282", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mazatec, Huautla", "iso_1_code": null, "iso_3_code": "mau", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8283", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mazatec, Ixcatl\u00e1n", "iso_1_code": null, "iso_3_code": "mzi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8284", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mazatec, Puebla and Northeastern", "iso_1_code": null, "iso_3_code": "pbm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8285", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mazatec, Soyaltepec", "iso_1_code": null, "iso_3_code": "vmp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8286", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mazatec, Ayautla", "iso_1_code": null, "iso_3_code": "vmy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8287", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mazatec, Mazatl\u00e1n", "iso_1_code": null, "iso_3_code": "vmz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8288", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8279", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Popoloca", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Popoloca, Mezontla", "iso_1_code": null, "iso_3_code": "pbe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8290", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Popoloca, Coyotepec", "iso_1_code": null, "iso_3_code": "pbf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8291", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Popoloca, Santa In\u00e9s Ahuatempan", "iso_1_code": null, "iso_3_code": "pca", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8292", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Popoloca, San Marcos Tlacoyalco", "iso_1_code": null, "iso_3_code": "pls", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8293", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Popoloca, San Juan Atzingo", "iso_1_code": null, "iso_3_code": "poe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8294", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Popoloca, San Felipe Otlaltepec", "iso_1_code": null, "iso_3_code": "pow", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8295", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Popoloca, San Lu\u00eds Temalacayuca", "iso_1_code": null, "iso_3_code": "pps", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8296", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8289", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8276", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotecan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chatino", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chatino, Eastern Highland", "iso_1_code": null, "iso_3_code": "cly", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8299", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chatino, Tataltepec", "iso_1_code": null, "iso_3_code": "cta", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8300", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chatino, Western Highland", "iso_1_code": null, "iso_3_code": "ctp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8301", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chatino, Zacatepec", "iso_1_code": null, "iso_3_code": "ctz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8302", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chatino, Nopala", "iso_1_code": null, "iso_3_code": "cya", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8303", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chatino, Zenzontepec", "iso_1_code": null, "iso_3_code": "czn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8304", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8298", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Zapotec, Sierra de Ju\u00e1rez", "iso_1_code": null, "iso_3_code": "zaa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8306", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, Western Tlacolula Valley", "iso_1_code": null, "iso_3_code": "zab", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8307", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, Ocotl\u00e1n", "iso_1_code": null, "iso_3_code": "zac", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8308", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, Cajonos", "iso_1_code": null, "iso_3_code": "zad", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8309", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, Yareni", "iso_1_code": null, "iso_3_code": "zae", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8310", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, Ayoquesco", "iso_1_code": null, "iso_3_code": "zaf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8311", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec, Isthmus", "iso_1_code": null, "iso_3_code": "zai", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8312", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, Miahuatl\u00e1n", "iso_1_code": null, "iso_3_code": "zam", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8313", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, Ozolotepec", "iso_1_code": null, "iso_3_code": "zao", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8314", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, Alo\u00e1pam", "iso_1_code": null, "iso_3_code": "zaq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8315", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec, Rinc\u00f3n", "iso_1_code": null, "iso_3_code": "zar", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8316", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, Santo Domingo Albarradas", "iso_1_code": null, "iso_3_code": "zas", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8317", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, Tabaa", "iso_1_code": null, "iso_3_code": "zat", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8318", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, Yatzachi", "iso_1_code": null, "iso_3_code": "zav", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8319", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, Mitla", "iso_1_code": null, "iso_3_code": "zaw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8320", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, Xadani", "iso_1_code": null, "iso_3_code": "zax", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8321", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec, Coatecas Altas", "iso_1_code": null, "iso_3_code": "zca", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8322", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, Las Delicias", "iso_1_code": null, "iso_3_code": "zcd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8323", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec, Asunci\u00f3n Mixtepec", "iso_1_code": null, "iso_3_code": "zoo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8324", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec, Lachiguiri", "iso_1_code": null, "iso_3_code": "zpa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8325", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, Yautepec", "iso_1_code": null, "iso_3_code": "zpb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8326", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec, Choapan", "iso_1_code": null, "iso_3_code": "zpc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8327", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, Southeastern Ixtl\u00e1n", "iso_1_code": null, "iso_3_code": "zpd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8328", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec, Petapa", "iso_1_code": null, "iso_3_code": "zpe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8329", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec, San Pedro Quiatoni", "iso_1_code": null, "iso_3_code": "zpf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8330", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec, Guevea de Humboldt", "iso_1_code": null, "iso_3_code": "zpg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8331", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, Totomachapan", "iso_1_code": null, "iso_3_code": "zph", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8332", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec, Santa Mar\u00eda Quiegolani", "iso_1_code": null, "iso_3_code": "zpi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8333", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, Quiavicuzas", "iso_1_code": null, "iso_3_code": "zpj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8334", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, Tlacolulita", "iso_1_code": null, "iso_3_code": "zpk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8335", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec, Lachix\u00edo", "iso_1_code": null, "iso_3_code": "zpl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8336", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, Mixtepec", "iso_1_code": null, "iso_3_code": "zpm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8337", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, Santa In\u00e9s Yatzechi", "iso_1_code": null, "iso_3_code": "zpn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8338", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec, Amatl\u00e1n", "iso_1_code": null, "iso_3_code": "zpo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8339", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, El Alto", "iso_1_code": null, "iso_3_code": "zpp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8340", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec, Zoogocho", "iso_1_code": null, "iso_3_code": "zpq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8341", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, Santiago Xanica", "iso_1_code": null, "iso_3_code": "zpr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8342", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec, Coatl\u00e1n", "iso_1_code": null, "iso_3_code": "zps", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8343", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec, San Vicente Coatl\u00e1n", "iso_1_code": null, "iso_3_code": "zpt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8344", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, Yal\u00e1lag", "iso_1_code": null, "iso_3_code": "zpu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8345", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, Chichicapan", "iso_1_code": null, "iso_3_code": "zpv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8346", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, Zaniza", "iso_1_code": null, "iso_3_code": "zpw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8347", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec, San Baltazar Loxicha", "iso_1_code": null, "iso_3_code": "zpx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8348", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec, Mazaltepec", "iso_1_code": null, "iso_3_code": "zpy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8349", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec, Texmelucan", "iso_1_code": null, "iso_3_code": "zpz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8350", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, Southern Rincon", "iso_1_code": null, "iso_3_code": "zsr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8351", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, Elotepec", "iso_1_code": null, "iso_3_code": "zte", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8352", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec, Xanagu\u00eda", "iso_1_code": null, "iso_3_code": "ztg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8353", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec, Lapagu\u00eda-Guivini", "iso_1_code": null, "iso_3_code": "ztl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8354", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec, San Agust\u00edn Mixtepec", "iso_1_code": null, "iso_3_code": "ztm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8355", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec, Santa Catarina Albarradas", "iso_1_code": null, "iso_3_code": "ztn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8356", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec, Loxicha", "iso_1_code": null, "iso_3_code": "ztp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8357", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec, Quioquitani-Quier\u00ed", "iso_1_code": null, "iso_3_code": "ztq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8358", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zapotec, Tilquiapan", "iso_1_code": null, "iso_3_code": "zts", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8359", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec, Tejalapan", "iso_1_code": null, "iso_3_code": "ztt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8360", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec, G\u00fcil\u00e1", "iso_1_code": null, "iso_3_code": "ztu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8361", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec, Zaachila", "iso_1_code": null, "iso_3_code": "ztx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8362", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zapotec, Yatee", "iso_1_code": null, "iso_3_code": "zty", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8363", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8305", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8297", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8275", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8208", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western Otomanguean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Oto-Pame-Chinantecan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chinantecan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chinantec, Comaltepec", "iso_1_code": null, "iso_3_code": "cco", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8367", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chinantec, Ojitl\u00e1n", "iso_1_code": null, "iso_3_code": "chj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8368", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chinantec, Quiotepec", "iso_1_code": null, "iso_3_code": "chq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8369", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chinantec, Ozumac\u00edn", "iso_1_code": null, "iso_3_code": "chz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8370", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chinantec, Lealao", "iso_1_code": null, "iso_3_code": "cle", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8371", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chinantec, Lalana", "iso_1_code": null, "iso_3_code": "cnl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8372", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chinantec, Tepetotutla", "iso_1_code": null, "iso_3_code": "cnt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8373", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chinantec, Palantla", "iso_1_code": null, "iso_3_code": "cpa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8374", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chinantec, Chiltepec", "iso_1_code": null, "iso_3_code": "csa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8375", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chinantec, Sochiapam", "iso_1_code": null, "iso_3_code": "cso", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8376", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chinantec, Tepinapa", "iso_1_code": null, "iso_3_code": "cte", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8377", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chinantec, Tlacoatzintepec", "iso_1_code": null, "iso_3_code": "ctl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8378", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chinantec, Usila", "iso_1_code": null, "iso_3_code": "cuc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8379", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chinantec, Valle Nacional", "iso_1_code": null, "iso_3_code": "cvn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8380", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8366", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oto-Pamean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chichimeco-Jonaz", "iso_1_code": null, "iso_3_code": "pei", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8382", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Matlatzinca-Ocuilteco", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Matlatzinca, San Francisco", "iso_1_code": null, "iso_3_code": "mat", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8384", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Matlatzinca, Atzingo", "iso_1_code": null, "iso_3_code": "ocu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8385", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8383", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mazahua", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mazahua, Central", "iso_1_code": null, "iso_3_code": "maz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8387", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mazahua, Michoac\u00e1n", "iso_1_code": null, "iso_3_code": "mmc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8388", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8386", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Otomi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Otomi, Mezquital", "iso_1_code": null, "iso_3_code": "ote", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8390", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Otomi, Tilapa", "iso_1_code": null, "iso_3_code": "otl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8391", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Otomi, Eastern Highland", "iso_1_code": null, "iso_3_code": "otm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8392", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Otomi, Tenango", "iso_1_code": null, "iso_3_code": "otn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8393", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Otomi, Quer\u00e9taro", "iso_1_code": null, "iso_3_code": "otq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8394", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Otom\u00ed, Estado de M\u00e9xico", "iso_1_code": null, "iso_3_code": "ots", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8395", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Otomi, Temoaya", "iso_1_code": null, "iso_3_code": "ott", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8396", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Otomi, Texcatepec", "iso_1_code": null, "iso_3_code": "otx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8397", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Otomi, Ixtenco", "iso_1_code": null, "iso_3_code": "otz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8398", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8389", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pame", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pame, Central", "iso_1_code": null, "iso_3_code": "pbs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8400", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pame, Northern", "iso_1_code": null, "iso_3_code": "pmq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8401", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Pame, Southern", "iso_1_code": null, "iso_3_code": "pmz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8402", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8399", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8381", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8365", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tlapanec-Manguean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Manguean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chiapanec", "iso_1_code": null, "iso_3_code": "cip", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8405", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8404", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tlapanec-Subtiaba", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Subtiaba", "iso_1_code": null, "iso_3_code": "sut", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8407", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tlapanec", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Me\u2019phaa, Malinaltepec", "iso_1_code": null, "iso_3_code": "tcf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8409", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Me\u2019phaa, Azoy\u00fa", "iso_1_code": null, "iso_3_code": "tpc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8410", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Me\u2019phaa, Tlacoapa", "iso_1_code": null, "iso_3_code": "tpl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8411", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Me\u2019phaa, Acatepec", "iso_1_code": null, "iso_3_code": "tpx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8412", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8408", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8406", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8403", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8364", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8207", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Paezan.json b/data/Paezan.json index 6cb1b0f34234ffc35ca6c16b47bce5b93c3bd1be..c23c135f39032acd4506acae1cb95feea88b077e 100644 --- a/data/Paezan.json +++ b/data/Paezan.json @@ -2,96 +2,96 @@ "name": "Paezan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Andaqui", "iso_1_code": null, "iso_3_code": "ana", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8414", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Coconuco", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Anserma", "iso_1_code": null, "iso_3_code": "ans", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8416", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Caramanta", "iso_1_code": null, "iso_3_code": "crf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8417", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Misak", "iso_1_code": null, "iso_3_code": "gum", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8418", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Totoro", "iso_1_code": null, "iso_3_code": "ttk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8419", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8415", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Paezan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nasa", "iso_1_code": null, "iso_3_code": "pbb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8421", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8420", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8413", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Palaihnihan.json b/data/Palaihnihan.json index dc2a12360cf7dcaa1a5bfebd256fe58a763a7a16..82c76bdbc3764bbbbfc93cf0bab3133d3744481b 100644 --- a/data/Palaihnihan.json +++ b/data/Palaihnihan.json @@ -2,30 +2,30 @@ "name": "Palaihnihan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Achumawi", "iso_1_code": null, "iso_3_code": "acv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8423", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Atsugewi", "iso_1_code": null, "iso_3_code": "atw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8424", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8422", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Panoan.json b/data/Panoan.json index db4e645c244b73cd085844c8de9d2e449af6a876..12912d021e54a96d5e7b283a7fc208e4c021265b 100644 --- a/data/Panoan.json +++ b/data/Panoan.json @@ -2,406 +2,406 @@ "name": "Panoan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kaxarar\u00ed", "iso_1_code": null, "iso_3_code": "ktx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8426", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pisabo", "iso_1_code": null, "iso_3_code": "pig", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8427", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sensi", "iso_1_code": null, "iso_3_code": "sni", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8428", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kulina Pano", "iso_1_code": null, "iso_3_code": "xpk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8429", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bolivian Panoan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pacahuara", "iso_1_code": null, "iso_3_code": "pcp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8431", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ch\u00e1kobo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ch\u00e1cobo", "iso_1_code": null, "iso_3_code": "cao", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8433", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8432", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8430", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mainline", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Poyan\u00e1wa", "iso_1_code": null, "iso_3_code": "pyn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8435", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tuxin\u00e1wa", "iso_1_code": null, "iso_3_code": "tux", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8436", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cashibo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kakataibo-Kashibo", "iso_1_code": null, "iso_3_code": "cbr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8438", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8437", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pano", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Panobo", "iso_1_code": null, "iso_3_code": "pno", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8440", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8439", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shipibo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Capanahua", "iso_1_code": null, "iso_3_code": "kaq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8442", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Katuk\u00edna, Panoan", "iso_1_code": null, "iso_3_code": "knt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8443", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Marubo", "iso_1_code": null, "iso_3_code": "mzr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8444", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Remo", "iso_1_code": null, "iso_3_code": "rem", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8445", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shipibo-Conibo", "iso_1_code": null, "iso_3_code": "shp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8446", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8441", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tri-State", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kashinawa", "iso_1_code": null, "iso_3_code": "cbs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8448", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sharanahua", "iso_1_code": null, "iso_3_code": "mcd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8449", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yaminahua", "iso_1_code": null, "iso_3_code": "yaa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8450", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yawanawa", "iso_1_code": null, "iso_3_code": "ywn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8451", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Amawaka", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Amahuaca", "iso_1_code": null, "iso_3_code": "amc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8453", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Isconahua", "iso_1_code": null, "iso_3_code": "isc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8454", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8452", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8447", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Atsahuaca", "iso_1_code": null, "iso_3_code": "atc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8456", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yora", "iso_1_code": null, "iso_3_code": "mts", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8457", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nukuini", "iso_1_code": null, "iso_3_code": "nuc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8458", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8455", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8434", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mayoruna-Mats\u00e9s", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Matses", "iso_1_code": null, "iso_3_code": "mcf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8460", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mat\u00eds", "iso_1_code": null, "iso_3_code": "mpq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8461", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Korubo", "iso_1_code": null, "iso_3_code": "xor", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8462", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8459", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8425", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Pauwasi.json b/data/Pauwasi.json index 400a507e298d54258d407db30a88dfffd99aa299..f299e54d4074206ed45717ebadf76e9ff8bcc562 100644 --- a/data/Pauwasi.json +++ b/data/Pauwasi.json @@ -2,84 +2,84 @@ "name": "Pauwasi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Emem", "iso_1_code": null, "iso_3_code": "enr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8465", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zorop", "iso_1_code": null, "iso_3_code": "wfg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8466", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karkar-Yuri", "iso_1_code": null, "iso_3_code": "yuj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8467", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8464", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tebi", "iso_1_code": null, "iso_3_code": "dmu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8469", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Towei", "iso_1_code": null, "iso_3_code": "ttn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8470", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8468", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8463", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Piawi.json b/data/Piawi.json index 8283e796b21e518ebbe8a106d12d0006915d9f82..2e0a4dadb646c26a6c2c2db76d90285ff329c2cf 100644 --- a/data/Piawi.json +++ b/data/Piawi.json @@ -2,32 +2,32 @@ "name": "Piawi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pinai-Hagahai", "iso_1_code": null, "iso_3_code": "pnn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8472", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Haruai", "iso_1_code": null, "iso_3_code": "tmd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8473", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8471", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Pidgin.json b/data/Pidgin.json index f262ab36e9d6fc243029cac3df895140bd70cdfd..57ae7b2b345451d719e4a6dc499a5c9c6248410c 100644 --- a/data/Pidgin.json +++ b/data/Pidgin.json @@ -2,336 +2,336 @@ "name": "Pidgin", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mobilian", "iso_1_code": null, "iso_3_code": "mod", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8475", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ndyuka-Trio Pidgin", "iso_1_code": null, "iso_3_code": "njt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8476", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Amerindian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chinook Wawa", "iso_1_code": null, "iso_3_code": "chn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8478", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Delaware, Pidgin", "iso_1_code": null, "iso_3_code": "dep", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8479", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8477", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Assamese based", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nefamese", "iso_1_code": null, "iso_3_code": "nef", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8481", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8480", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "English based", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Atlantic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Liberian English", "iso_1_code": null, "iso_3_code": "lir", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8484", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8483", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pacific", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chinese Pidgin English", "iso_1_code": null, "iso_3_code": "cpi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8486", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8485", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8482", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "French based", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tay Boi", "iso_1_code": null, "iso_3_code": "tas", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8488", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8487", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hausa based", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Barikanchi", "iso_1_code": null, "iso_3_code": "bxo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8490", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gibanawa", "iso_1_code": null, "iso_3_code": "gib", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8491", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8489", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Iha based", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Iha Based Pidgin", "iso_1_code": null, "iso_3_code": "ihb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8493", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8492", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay based", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Broome Pearling Lugger Pidgin", "iso_1_code": null, "iso_3_code": "bpl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8495", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8494", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Motu based", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Motu, Hiri", "iso_1_code": "ho", "iso_3_code": "hmo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8497", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8496", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Onin based", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Onin Based Pidgin", "iso_1_code": null, "iso_3_code": "onx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8499", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8498", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Romance based", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lingua Franca", "iso_1_code": null, "iso_3_code": "pml", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8501", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8500", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Swahili based", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Settla", "iso_1_code": null, "iso_3_code": "sta", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8503", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8502", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zulu based", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pidgin Bantu", "iso_1_code": null, "iso_3_code": "fng", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8505", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8504", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8474", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Pomoan.json b/data/Pomoan.json index 25a896edb9015db8423569f05072ad9e394888a8..5d3e3b9450204c5855f581736c7d9772bae21cc8 100644 --- a/data/Pomoan.json +++ b/data/Pomoan.json @@ -2,102 +2,102 @@ "name": "Pomoan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pomo, Eastern", "iso_1_code": null, "iso_3_code": "peb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8507", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pomo, Northeastern", "iso_1_code": null, "iso_3_code": "pef", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8508", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pomo, Southeastern", "iso_1_code": null, "iso_3_code": "pom", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8509", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pomo, Northern", "iso_1_code": null, "iso_3_code": "pej", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8511", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kashaya", "iso_1_code": null, "iso_3_code": "kju", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8513", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pomo, Southern", "iso_1_code": null, "iso_3_code": "peq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8514", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pomo, Central", "iso_1_code": null, "iso_3_code": "poo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8515", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8512", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8510", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8506", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Puinavean.json b/data/Puinavean.json index c1e91d56773a68c2112475fb2ee68dfe5d6fda31..69088b63fe56700fe31e0677073d0fa8e115397b 100644 --- a/data/Puinavean.json +++ b/data/Puinavean.json @@ -2,119 +2,119 @@ "name": "Puinavean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "D\u00e2w", "iso_1_code": null, "iso_3_code": "kwa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8517", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Puinave", "iso_1_code": null, "iso_3_code": "pui", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8518", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Cacua", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cacua", "iso_1_code": null, "iso_3_code": "cbv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8520", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nukak Mak\u00fa", "iso_1_code": null, "iso_3_code": "mbr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8521", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8519", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hupda", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hupd\u00eb", "iso_1_code": null, "iso_3_code": "jup", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8523", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yuhup", "iso_1_code": null, "iso_3_code": "yab", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8524", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8522", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaburi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nad\u00ebb", "iso_1_code": null, "iso_3_code": "mbj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8526", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8525", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8516", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Purian.json b/data/Purian.json index fa58b6c26b53db8fd6f41ed57fc8e6bb0a06ed9e..7146b2c89624b3e33661d43efca89d9f9073ec09 100644 --- a/data/Purian.json +++ b/data/Purian.json @@ -2,30 +2,30 @@ "name": "Purian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Puri", "iso_1_code": null, "iso_3_code": "prr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8528", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Korop\u00f3", "iso_1_code": null, "iso_3_code": "xxr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8529", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8527", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Quechuan.json b/data/Quechuan.json index 160515e93334d95dbe605d1c98cf97be96e3cb8b..8a5e36aa996858141cd9e5080bbfe4800f72c6ac 100644 --- a/data/Quechuan.json +++ b/data/Quechuan.json @@ -2,660 +2,660 @@ "name": "Quechuan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Central Quechua", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Quechua, Pacaraos", "iso_1_code": "qu", "iso_3_code": "qvp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8532", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ap-am-ah", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Quechua, Huallaga", "iso_1_code": "qu", "iso_3_code": "qub", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8534", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Quechua, Ambo-Pasco", "iso_1_code": "qu", "iso_3_code": "qva", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8535", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Quechua, Panao", "iso_1_code": "qu", "iso_3_code": "qxh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8536", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Alto Mara\u00f1\u00f3n", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Quechua, Huamal\u00edes-Dos de Mayo", "iso_1_code": "qu", "iso_3_code": "qvh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8538", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Quechua, Margos-Yarowilca-Lauricocha", "iso_1_code": "qu", "iso_3_code": "qvm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8539", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8537", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Alto Pativilca", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Quechua, Cajatambo North Lima", "iso_1_code": "qu", "iso_3_code": "qvl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8541", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Quechua, Chiqui\u00e1n", "iso_1_code": "qu", "iso_3_code": "qxa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8542", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8540", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8533", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wankay", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Quechua, Chaupihuaranga", "iso_1_code": "qu", "iso_3_code": "qur", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8544", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Quechua, Yauyos", "iso_1_code": "qu", "iso_3_code": "qux", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8545", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Quechua, North Jun\u00edn", "iso_1_code": "qu", "iso_3_code": "qvn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8546", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Quechua, Huaylla Wanca", "iso_1_code": "qu", "iso_3_code": "qvw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8547", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Quechua, Chincha", "iso_1_code": "qu", "iso_3_code": "qxc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8548", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Quechua, Santa Ana de Tusi Pasco", "iso_1_code": "qu", "iso_3_code": "qxt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8549", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Quechua, Jauja Wanca", "iso_1_code": "qu", "iso_3_code": "qxw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8550", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8543", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waylay", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Quechua, Corongo Ancash", "iso_1_code": "qu", "iso_3_code": "qwa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8552", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Quechua, Huaylas Ancash", "iso_1_code": "qu", "iso_3_code": "qwh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8553", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Quechua, Sihuas Ancash", "iso_1_code": "qu", "iso_3_code": "qws", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8554", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Conchucos", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Quechua, Northern Conchucos", "iso_1_code": "qu", "iso_3_code": "qxn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8556", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Quechua, Southern Conchucos", "iso_1_code": "qu", "iso_3_code": "qxo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8557", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8555", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8551", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8531", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Peripheral Quechua", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chinchay", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Northern Chinchay", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Inga", "iso_1_code": null, "iso_3_code": "inb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8561", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Inga, Jungle", "iso_1_code": null, "iso_3_code": "inj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8562", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Quichua, Calder\u00f3n Highland", "iso_1_code": "qu", "iso_3_code": "qud", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8563", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Quichua, Chimborazo Highland", "iso_1_code": "qu", "iso_3_code": "qug", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8564", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Quechua, Chachapoyas", "iso_1_code": "qu", "iso_3_code": "quk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8565", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Quechua, Southern Pastaza", "iso_1_code": "qu", "iso_3_code": "qup", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8566", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Quichua, Tena Lowland", "iso_1_code": "qu", "iso_3_code": "quw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8567", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Quichua, Imbabura Highland", "iso_1_code": "qu", "iso_3_code": "qvi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8568", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Quichua, Loja Highland", "iso_1_code": "qu", "iso_3_code": "qvj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8569", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Quichua, Napo", "iso_1_code": "qu", "iso_3_code": "qvo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8570", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Quechua, San Mart\u00edn", "iso_1_code": "qu", "iso_3_code": "qvs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8571", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Quichua, Northern Pastaza", "iso_1_code": "qu", "iso_3_code": "qvz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8572", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Quichua, Salasaca Highland", "iso_1_code": "qu", "iso_3_code": "qxl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8573", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Quichua, Ca\u00f1ar Highland", "iso_1_code": "qu", "iso_3_code": "qxr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8574", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8560", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern Chinchay", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Quechua, South Bolivian", "iso_1_code": "qu", "iso_3_code": "quh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8576", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Quechua, North Bolivian", "iso_1_code": "qu", "iso_3_code": "qul", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8577", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Quichua, Santiago del Estero", "iso_1_code": "qu", "iso_3_code": "qus", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8578", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Southern Peruvian Quechua", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Quechua, Ayacucho", "iso_1_code": "qu", "iso_3_code": "quy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8580", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Quechua, Cusco", "iso_1_code": "qu", "iso_3_code": "quz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8581", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Quechua, Eastern Apur\u00edmac", "iso_1_code": "qu", "iso_3_code": "qve", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8582", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Quechua, Puno", "iso_1_code": "qu", "iso_3_code": "qxp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8583", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Quechua, Arequipa-La Uni\u00f3n", "iso_1_code": "qu", "iso_3_code": "qxu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8584", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8579", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8575", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8559", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yungay", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Quechua, Lambayeque", "iso_1_code": "qu", "iso_3_code": "quf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8587", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Quechua, Cajamarca", "iso_1_code": "qu", "iso_3_code": "qvc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8588", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8586", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8585", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8558", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8530", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Ramu-Lower Sepik.json b/data/Ramu-Lower Sepik.json index b115f56f85a2bc34e61f96dbdb98afdb5b9ca844..8688a689ddf04f9c351ff30fdb3e64a99de56ae2 100644 --- a/data/Ramu-Lower Sepik.json +++ b/data/Ramu-Lower Sepik.json @@ -2,514 +2,514 @@ "name": "Ramu-Lower Sepik", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kambot", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ap Ma", "iso_1_code": null, "iso_3_code": "kbx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8591", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8590", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lower Sepik", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Angoram", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Angoram", "iso_1_code": null, "iso_3_code": "aog", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8594", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8593", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chambri", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chambri", "iso_1_code": null, "iso_3_code": "can", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8596", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8595", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karawari", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Karawari", "iso_1_code": null, "iso_3_code": "tzx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8598", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yimas", "iso_1_code": null, "iso_3_code": "yee", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8599", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8597", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nor", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Murik", "iso_1_code": null, "iso_3_code": "mtf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8601", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kopar", "iso_1_code": null, "iso_3_code": "xop", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8602", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8600", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8592", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ramu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Grass", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Abu", "iso_1_code": null, "iso_3_code": "ado", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8605", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ambakich", "iso_1_code": null, "iso_3_code": "aew", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8606", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waran", "iso_1_code": null, "iso_3_code": "byz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8607", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gorovu", "iso_1_code": null, "iso_3_code": "grq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8608", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8604", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Middle Ramu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aiome", "iso_1_code": null, "iso_3_code": "aki", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8610", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Anor", "iso_1_code": null, "iso_3_code": "anj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8611", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rao", "iso_1_code": null, "iso_3_code": "rao", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8612", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8609", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mikarew", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kire", "iso_1_code": null, "iso_3_code": "geb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8614", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Aruamu", "iso_1_code": null, "iso_3_code": "msy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8615", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Akukem", "iso_1_code": null, "iso_3_code": "spm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8616", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8613", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ottilien", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Borei", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mbore", "iso_1_code": null, "iso_3_code": "gai", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8619", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8618", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bosmun-Awar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Awar", "iso_1_code": null, "iso_3_code": "aya", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8621", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bosmun", "iso_1_code": null, "iso_3_code": "bqs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8622", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8620", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Watam", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kayan", "iso_1_code": null, "iso_3_code": "kct", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8624", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Marangis", "iso_1_code": null, "iso_3_code": "wax", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8625", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8623", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8617", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tamolan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chini", "iso_1_code": null, "iso_3_code": "afi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8627", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Breri", "iso_1_code": null, "iso_3_code": "brq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8628", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Igana", "iso_1_code": null, "iso_3_code": "igg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8629", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Inapang", "iso_1_code": null, "iso_3_code": "mzu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8630", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Romkun", "iso_1_code": null, "iso_3_code": "rmk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8631", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kominimung", "iso_1_code": null, "iso_3_code": "xoi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8632", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8626", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tanggu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Andarum", "iso_1_code": null, "iso_3_code": "aod", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8634", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kanggape", "iso_1_code": null, "iso_3_code": "igm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8635", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tanguat", "iso_1_code": null, "iso_3_code": "tbs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8636", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tanggu", "iso_1_code": null, "iso_3_code": "tgu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8637", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8633", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8603", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8589", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Sahaptian.json b/data/Sahaptian.json index 54eb8213907a01ec731cc4a205ac7d6523823480..684d06e055d38825a0ef4d02028bb465c2bcd35a 100644 --- a/data/Sahaptian.json +++ b/data/Sahaptian.json @@ -2,71 +2,71 @@ "name": "Sahaptian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nez Perce", "iso_1_code": null, "iso_3_code": "nez", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8639", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sahaptin", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tenino", "iso_1_code": null, "iso_3_code": "tqn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8641", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Umatilla", "iso_1_code": null, "iso_3_code": "uma", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8642", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Walla Walla", "iso_1_code": null, "iso_3_code": "waa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8643", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yakama", "iso_1_code": null, "iso_3_code": "yak", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8644", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8640", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8638", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Salish.json b/data/Salish.json index cdb40297dbed6f5c97a9ee505f17ebf811406c31..12c6cbc8fd42aa2dc2bdb60df838cfab90916cc3 100644 --- a/data/Salish.json +++ b/data/Salish.json @@ -2,379 +2,379 @@ "name": "Salish", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bella Coola", "iso_1_code": null, "iso_3_code": "blc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8646", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tillamook", "iso_1_code": null, "iso_3_code": "til", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8647", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central Salish", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Clallam", "iso_1_code": null, "iso_3_code": "clm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8649", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Comox", "iso_1_code": null, "iso_3_code": "coo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8650", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Halkomelem", "iso_1_code": null, "iso_3_code": "hur", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8651", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nooksack", "iso_1_code": null, "iso_3_code": "nok", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8652", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pentlatch", "iso_1_code": null, "iso_3_code": "ptw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8653", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sechelt", "iso_1_code": null, "iso_3_code": "sec", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8654", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Squamish", "iso_1_code": null, "iso_3_code": "squ", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8655", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern Straits Salish", "iso_1_code": null, "iso_3_code": "str", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8656", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Twana", "iso_1_code": null, "iso_3_code": "twa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8657", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lushootseed", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lushootseed", "iso_1_code": null, "iso_3_code": "lut", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8659", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Skagit", "iso_1_code": null, "iso_3_code": "ska", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8660", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern Lushootseed", "iso_1_code": null, "iso_3_code": "slh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8661", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Snohomish", "iso_1_code": null, "iso_3_code": "sno", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8662", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8658", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8648", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Interior", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lillooet", "iso_1_code": null, "iso_3_code": "lil", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8664", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shuswap", "iso_1_code": null, "iso_3_code": "shs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8665", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Thompson", "iso_1_code": null, "iso_3_code": "thp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8667", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8666", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Columbia-Wenatchi", "iso_1_code": null, "iso_3_code": "col", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8669", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Coeur d\u2019Alene", "iso_1_code": null, "iso_3_code": "crd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8670", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Okanagan", "iso_1_code": null, "iso_3_code": "oka", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8671", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kalispel", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kalispel-Pend d\u2019Oreille", "iso_1_code": null, "iso_3_code": "fla", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8673", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Spokane", "iso_1_code": null, "iso_3_code": "spo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8674", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8672", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8668", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8663", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tsamosan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Inland", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chehalis, Upper", "iso_1_code": null, "iso_3_code": "cjh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8677", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cowlitz", "iso_1_code": null, "iso_3_code": "cow", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8678", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8676", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maritime", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chehalis, Lower", "iso_1_code": null, "iso_3_code": "cea", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8680", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Quinault", "iso_1_code": null, "iso_3_code": "qun", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8681", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8679", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8675", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8645", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Senagi.json b/data/Senagi.json index c5bc3b702a172b5134604da8456636a5816b4e0b..c1672545f8686a54a0fe78aaa2680ac8ef0a3147 100644 --- a/data/Senagi.json +++ b/data/Senagi.json @@ -2,32 +2,32 @@ "name": "Senagi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Angor", "iso_1_code": null, "iso_3_code": "agg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8683", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Dla", "iso_1_code": null, "iso_3_code": "kbv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8684", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8682", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Sepik.json b/data/Sepik.json index a606f70752ca6588a24276c5339f2118ee571736..ab884d0b5f5842a9a7b595c36e871423af3778e5 100644 --- a/data/Sepik.json +++ b/data/Sepik.json @@ -2,737 +2,737 @@ "name": "Sepik", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Abau", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Abau", "iso_1_code": null, "iso_3_code": "aau", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8687", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8686", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Iwam", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Amal", "iso_1_code": null, "iso_3_code": "aad", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8689", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Iwam", "iso_1_code": null, "iso_3_code": "iwm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8690", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Iwam, Sepik", "iso_1_code": null, "iso_3_code": "iws", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8691", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8688", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Leonhard Schultze", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pefiyahe", "iso_1_code": null, "iso_3_code": "ppq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8693", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tuwari", "iso_1_code": null, "iso_3_code": "tww", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8694", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Walio", "iso_1_code": null, "iso_3_code": "wla", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8695", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yawiyo", "iso_1_code": null, "iso_3_code": "ybx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8696", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8692", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ndu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ambulas", "iso_1_code": null, "iso_3_code": "abt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8698", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Burui", "iso_1_code": null, "iso_3_code": "bry", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8699", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Boikin", "iso_1_code": null, "iso_3_code": "bzf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8700", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gaikundi", "iso_1_code": null, "iso_3_code": "gbf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8701", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Iatmul", "iso_1_code": null, "iso_3_code": "ian", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8702", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mevembet", "iso_1_code": null, "iso_3_code": "keh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8703", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koiwat", "iso_1_code": null, "iso_3_code": "kxt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8704", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manambu", "iso_1_code": null, "iso_3_code": "mle", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8705", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gala", "iso_1_code": null, "iso_3_code": "nud", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8706", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sos Kundi", "iso_1_code": null, "iso_3_code": "sdk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8707", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sengo", "iso_1_code": null, "iso_3_code": "spk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8708", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hanga Hundi", "iso_1_code": null, "iso_3_code": "wos", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8709", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yalaku", "iso_1_code": null, "iso_3_code": "ylg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8710", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8697", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nukuma", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kwoma", "iso_1_code": null, "iso_3_code": "kmo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8712", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kwanga", "iso_1_code": null, "iso_3_code": "kwj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8713", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mende", "iso_1_code": null, "iso_3_code": "sim", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8714", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8711", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ram", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pouye", "iso_1_code": null, "iso_3_code": "bye", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8716", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Awtuw", "iso_1_code": null, "iso_3_code": "kmn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8717", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karawa", "iso_1_code": null, "iso_3_code": "xrw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8718", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8715", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sepik Hill", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Alamblak", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Alamblak", "iso_1_code": null, "iso_3_code": "amp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8721", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kaningra", "iso_1_code": null, "iso_3_code": "knr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8722", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8720", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bahinemo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Berinomo", "iso_1_code": null, "iso_3_code": "bit", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8724", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bahinemo", "iso_1_code": null, "iso_3_code": "bjh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8725", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bisis", "iso_1_code": null, "iso_3_code": "bnw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8726", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kapriman", "iso_1_code": null, "iso_3_code": "dju", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8727", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mari", "iso_1_code": null, "iso_3_code": "mbx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8728", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sumariup", "iso_1_code": null, "iso_3_code": "siv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8729", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Watakataui", "iso_1_code": null, "iso_3_code": "wtk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8730", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8723", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Papi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Baiyamo", "iso_1_code": null, "iso_3_code": "ppe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8732", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Asaba", "iso_1_code": null, "iso_3_code": "seo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8733", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8731", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sanio", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Niksek", "iso_1_code": null, "iso_3_code": "gbe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8735", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hewa", "iso_1_code": null, "iso_3_code": "ham", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8736", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Piame", "iso_1_code": null, "iso_3_code": "pin", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8737", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saniyo-Hiyewe", "iso_1_code": null, "iso_3_code": "sny", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8738", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8734", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8719", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tama", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ayi", "iso_1_code": null, "iso_3_code": "ayq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8740", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pahi", "iso_1_code": null, "iso_3_code": "lgt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8741", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mehek", "iso_1_code": null, "iso_3_code": "nux", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8742", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pasi", "iso_1_code": null, "iso_3_code": "psq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8743", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yessan-Mayo", "iso_1_code": null, "iso_3_code": "yss", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8744", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kalou", "iso_1_code": null, "iso_3_code": "ywa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8745", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8739", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wogamusin-Chenapian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chenapian", "iso_1_code": null, "iso_3_code": "cjn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8747", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wogamusin", "iso_1_code": null, "iso_3_code": "wog", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8748", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8746", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yellow River", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ak", "iso_1_code": null, "iso_3_code": "akq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8750", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Awun", "iso_1_code": null, "iso_3_code": "aww", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8751", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Namia", "iso_1_code": null, "iso_3_code": "nnm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8752", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8749", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yerakai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yerakai", "iso_1_code": null, "iso_3_code": "yra", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8754", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8753", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8685", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Sign language.json b/data/Sign language.json index 2471696652cd1474aa7b7a0c7286ac8d6a26e6b4..19fbc0ce3c200cb019f2d1be8abc79a2b58ba9df 100644 --- a/data/Sign language.json +++ b/data/Sign language.json @@ -2,1632 +2,1632 @@ "name": "Sign language", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "International Sign", "iso_1_code": null, "iso_3_code": "ils", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8756", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Deaf community sign language", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Argentine Sign Language", "iso_1_code": null, "iso_3_code": "aed", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8758", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Armenian Sign Language", "iso_1_code": null, "iso_3_code": "aen", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8759", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Afghan Sign Language", "iso_1_code": null, "iso_3_code": "afg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8760", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "American Sign Language", "iso_1_code": null, "iso_3_code": "ase", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8761", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Auslan", "iso_1_code": null, "iso_3_code": "asf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8762", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Algerian Sign Language", "iso_1_code": null, "iso_3_code": "asp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8763", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Austrian Sign Language", "iso_1_code": null, "iso_3_code": "asq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8764", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "British Sign Language", "iso_1_code": null, "iso_3_code": "bfi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8765", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malian Sign Language", "iso_1_code": null, "iso_3_code": "bog", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8766", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bulgarian Sign Language", "iso_1_code": null, "iso_3_code": "bqn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8767", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bolivian Sign Language", "iso_1_code": null, "iso_3_code": "bvl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8768", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Brazilian Sign Language", "iso_1_code": null, "iso_3_code": "bzs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8769", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chadian Sign Language", "iso_1_code": null, "iso_3_code": "cds", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8770", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Catalan Sign Language", "iso_1_code": null, "iso_3_code": "csc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8771", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chiangmai Sign Language", "iso_1_code": null, "iso_3_code": "csd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8772", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Czech Sign Language", "iso_1_code": null, "iso_3_code": "cse", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8773", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cuban Sign Language", "iso_1_code": null, "iso_3_code": "csf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8774", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chilean Sign Language", "iso_1_code": null, "iso_3_code": "csg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8775", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chinese Sign Language", "iso_1_code": null, "iso_3_code": "csl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8776", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Colombian Sign Language", "iso_1_code": null, "iso_3_code": "csn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8777", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Croatian Sign Language", "iso_1_code": null, "iso_3_code": "csq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8778", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Costa Rican Sign Language", "iso_1_code": null, "iso_3_code": "csr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8779", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cambodian Sign Language", "iso_1_code": null, "iso_3_code": "csx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8780", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dominican Sign Language", "iso_1_code": null, "iso_3_code": "doq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8781", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sign Language of the Netherlands", "iso_1_code": null, "iso_3_code": "dse", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8782", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Danish Sign Language", "iso_1_code": null, "iso_3_code": "dsl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8783", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ecuadorian Sign Language", "iso_1_code": null, "iso_3_code": "ecs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8784", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Egyptian Sign Language", "iso_1_code": null, "iso_3_code": "esl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8785", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Salvadoran Sign Language", "iso_1_code": null, "iso_3_code": "esn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8786", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Estonian Sign Language", "iso_1_code": null, "iso_3_code": "eso", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8787", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ethiopian Sign Language", "iso_1_code": null, "iso_3_code": "eth", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8788", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Quebec Sign Language", "iso_1_code": null, "iso_3_code": "fcs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8789", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Finnish Sign Language", "iso_1_code": null, "iso_3_code": "fse", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8790", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "French Sign Language", "iso_1_code": null, "iso_3_code": "fsl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8791", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Finland-Swedish Sign Language", "iso_1_code": null, "iso_3_code": "fss", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8792", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ghanaian Sign Language", "iso_1_code": null, "iso_3_code": "gse", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8793", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "German Sign Language", "iso_1_code": null, "iso_3_code": "gsg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8794", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Guatemalan Sign Language", "iso_1_code": null, "iso_3_code": "gsm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8795", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Greek Sign Language", "iso_1_code": null, "iso_3_code": "gss", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8796", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Guinean Sign Language", "iso_1_code": null, "iso_3_code": "gus", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8797", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hanoi Sign Language", "iso_1_code": null, "iso_3_code": "hab", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8798", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Haiphong Sign Language", "iso_1_code": null, "iso_3_code": "haf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8799", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Honduran Sign Language", "iso_1_code": null, "iso_3_code": "hds", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8800", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hong Kong Sign Language", "iso_1_code": null, "iso_3_code": "hks", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8801", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ho Chi Minh City Sign Language", "iso_1_code": null, "iso_3_code": "hos", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8802", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hawai\u2018i Sign Language", "iso_1_code": null, "iso_3_code": "hps", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8803", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hungarian Sign Language", "iso_1_code": null, "iso_3_code": "hsh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8804", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hausa Sign Language", "iso_1_code": null, "iso_3_code": "hsl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8805", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Icelandic Sign Language", "iso_1_code": null, "iso_3_code": "icl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8806", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Indonesian Sign Language", "iso_1_code": null, "iso_3_code": "inl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8807", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Indian Sign Language", "iso_1_code": null, "iso_3_code": "ins", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8808", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Italian Sign Language", "iso_1_code": null, "iso_3_code": "ise", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8809", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Irish Sign Language", "iso_1_code": null, "iso_3_code": "isg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8810", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Israeli Sign Language", "iso_1_code": null, "iso_3_code": "isr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8811", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jamaican Sign Language", "iso_1_code": null, "iso_3_code": "jls", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8812", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jordanian Sign Language", "iso_1_code": null, "iso_3_code": "jos", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8813", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Japanese Sign Language", "iso_1_code": null, "iso_3_code": "jsl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8814", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Selangor Sign Language", "iso_1_code": null, "iso_3_code": "kgi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8815", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Korean Sign Language", "iso_1_code": null, "iso_3_code": "kvk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8816", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Libyan Sign Language", "iso_1_code": null, "iso_3_code": "lbs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8817", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Guinea-Bissau Sign Language", "iso_1_code": null, "iso_3_code": "lgs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8818", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lithuanian Sign Language", "iso_1_code": null, "iso_3_code": "lls", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8819", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Burundian Sign Language", "iso_1_code": null, "iso_3_code": "lsb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8820", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Latvian Sign Language", "iso_1_code": null, "iso_3_code": "lsl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8821", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tibetan Sign Language", "iso_1_code": null, "iso_3_code": "lsn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8822", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Laos Sign Language", "iso_1_code": null, "iso_3_code": "lso", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8823", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Panamanian Sign Language", "iso_1_code": null, "iso_3_code": "lsp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8824", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Trinidad and Tobago Sign Language", "iso_1_code": null, "iso_3_code": "lst", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8825", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Seychelles Sign Language", "iso_1_code": null, "iso_3_code": "lsw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8826", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mauritian Sign Language", "iso_1_code": null, "iso_3_code": "lsy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8827", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malawian Sign Language", "iso_1_code": null, "iso_3_code": "lws", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8828", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maltese Sign Language", "iso_1_code": null, "iso_3_code": "mdl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8829", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mexican Sign Language", "iso_1_code": null, "iso_3_code": "mfs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8830", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mongolian Sign Language", "iso_1_code": null, "iso_3_code": "msr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8831", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malagasy Sign Language", "iso_1_code": null, "iso_3_code": "mzc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8832", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mozambican Sign Language", "iso_1_code": null, "iso_3_code": "mzy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8833", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Namibian Sign Language", "iso_1_code": null, "iso_3_code": "nbs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8834", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nicaraguan Sign Language", "iso_1_code": null, "iso_3_code": "ncs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8835", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nigerian Sign Language", "iso_1_code": null, "iso_3_code": "nsi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8836", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Norwegian Sign Language", "iso_1_code": null, "iso_3_code": "nsl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8837", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nepalese Sign Language", "iso_1_code": null, "iso_3_code": "nsp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8838", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maritime Sign Language", "iso_1_code": null, "iso_3_code": "nsr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8839", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "New Zealand Sign Language", "iso_1_code": null, "iso_3_code": "nzs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8840", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Papua New Guinean Sign Language", "iso_1_code": null, "iso_3_code": "pgz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8841", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pakistan Sign Language", "iso_1_code": null, "iso_3_code": "pks", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8842", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Peruvian Sign Language", "iso_1_code": null, "iso_3_code": "prl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8843", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Iranian Sign Language", "iso_1_code": null, "iso_3_code": "psc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8844", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Penang Sign Language", "iso_1_code": null, "iso_3_code": "psg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8845", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Puerto Rican Sign Language", "iso_1_code": null, "iso_3_code": "psl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8846", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Polish Sign Language", "iso_1_code": null, "iso_3_code": "pso", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8847", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Filipino Sign Language", "iso_1_code": null, "iso_3_code": "psp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8848", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Portuguese Sign Language", "iso_1_code": null, "iso_3_code": "psr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8849", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Paraguayan Sign Language", "iso_1_code": null, "iso_3_code": "pys", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8850", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Romanian Sign Language", "iso_1_code": null, "iso_3_code": "rms", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8851", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Russian Sign Language", "iso_1_code": null, "iso_3_code": "rsl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8852", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rwandan Sign Language", "iso_1_code": null, "iso_3_code": "rsn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8853", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saudi Arabian Sign Language", "iso_1_code": null, "iso_3_code": "sdl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8854", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "French Belgian Sign Language", "iso_1_code": null, "iso_3_code": "sfb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8855", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South African Sign Language", "iso_1_code": null, "iso_3_code": "sfs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8856", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Swiss-German Sign Language", "iso_1_code": null, "iso_3_code": "sgg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8857", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sierra Leone Sign Language", "iso_1_code": null, "iso_3_code": "sgx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8858", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Swiss-Italian Sign Language", "iso_1_code": null, "iso_3_code": "slf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8859", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Singapore Sign Language", "iso_1_code": null, "iso_3_code": "sls", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8860", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Albanian Sign Language", "iso_1_code": null, "iso_3_code": "sqk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8861", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sri Lankan Sign Language", "iso_1_code": null, "iso_3_code": "sqs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8862", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Spanish Sign Language", "iso_1_code": null, "iso_3_code": "ssp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8863", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Swiss-French Sign Language", "iso_1_code": null, "iso_3_code": "ssr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8864", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Slovakian Sign Language", "iso_1_code": null, "iso_3_code": "svk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8865", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Swedish Sign Language", "iso_1_code": null, "iso_3_code": "swl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8866", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Solomon Islands Sign Language", "iso_1_code": null, "iso_3_code": "szs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8867", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tunisian Sign Language", "iso_1_code": null, "iso_3_code": "tse", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8868", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Turkish Sign Language", "iso_1_code": null, "iso_3_code": "tsm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8869", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Thai Sign Language", "iso_1_code": null, "iso_3_code": "tsq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8870", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Taiwan Sign Language", "iso_1_code": null, "iso_3_code": "tss", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8871", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tanzanian Sign Language", "iso_1_code": null, "iso_3_code": "tza", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8872", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ugandan Sign Language", "iso_1_code": null, "iso_3_code": "ugn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8873", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uruguayan Sign Language", "iso_1_code": null, "iso_3_code": "ugy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8874", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ukrainian Sign Language", "iso_1_code": null, "iso_3_code": "ukl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8875", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Flemish Sign Language", "iso_1_code": null, "iso_3_code": "vgt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8876", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Moldova Sign Language", "iso_1_code": null, "iso_3_code": "vsi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8877", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Venezuelan Sign Language", "iso_1_code": null, "iso_3_code": "vsl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8878", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Valencian Sign Language", "iso_1_code": null, "iso_3_code": "vsv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8879", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Bengal Sign Language", "iso_1_code": null, "iso_3_code": "wbs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8880", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kenyan Sign Language", "iso_1_code": null, "iso_3_code": "xki", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8881", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malaysian Sign Language", "iso_1_code": null, "iso_3_code": "xml", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8882", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Moroccan Sign Language", "iso_1_code": null, "iso_3_code": "xms", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8883", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Slovenian Sign Language", "iso_1_code": null, "iso_3_code": "ysl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8884", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Myanmar Sign Language", "iso_1_code": null, "iso_3_code": "ysm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8885", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zimbabwe Sign Language", "iso_1_code": null, "iso_3_code": "zib", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8886", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zambian Sign Language", "iso_1_code": null, "iso_3_code": "zsl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8887", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8757", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shared sign language", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Adamorobe Sign Language", "iso_1_code": null, "iso_3_code": "ads", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8889", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Algerian Jewish Sign Language", "iso_1_code": null, "iso_3_code": "ajs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8890", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Australian Aborigines Sign Language", "iso_1_code": null, "iso_3_code": "asw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8891", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ban Khor Sign Language", "iso_1_code": null, "iso_3_code": "bfk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8892", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kata Kolok", "iso_1_code": null, "iso_3_code": "bqy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8893", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mardin Sign Language", "iso_1_code": null, "iso_3_code": "dsz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8894", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miyakubo Sign Language", "iso_1_code": null, "iso_3_code": "ehs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8895", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ghandruk Sign Language", "iso_1_code": null, "iso_3_code": "gds", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8896", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Inuit Sign Language", "iso_1_code": null, "iso_3_code": "iks", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8897", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Konchri Sain", "iso_1_code": null, "iso_3_code": "jcs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8898", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jhyankot Sign Language", "iso_1_code": null, "iso_3_code": "jhs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8899", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Amami Koniya Sign Language", "iso_1_code": null, "iso_3_code": "jks", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8900", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jumli Sign Language", "iso_1_code": null, "iso_3_code": "jus", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8901", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Albarradas Sign Language", "iso_1_code": null, "iso_3_code": "lsc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8902", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sivia Sign Language", "iso_1_code": null, "iso_3_code": "lsv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8903", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Martha\u2019s Vineyard Sign Language", "iso_1_code": null, "iso_3_code": "mre", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8904", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yucatec Maya Sign Language", "iso_1_code": null, "iso_3_code": "msd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8905", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Old Kentish Sign Language", "iso_1_code": null, "iso_3_code": "okl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8906", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Providencia Sign Language", "iso_1_code": null, "iso_3_code": "prz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8907", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Plains Indian Sign Language", "iso_1_code": null, "iso_3_code": "psd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8908", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bribri Sign Language", "iso_1_code": null, "iso_3_code": "rib", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8909", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Brunca Sign Language", "iso_1_code": null, "iso_3_code": "rnb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8910", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miriwoong Sign Language", "iso_1_code": null, "iso_3_code": "rsm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8911", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kufr Qassem Sign Language", "iso_1_code": null, "iso_3_code": "sqx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8912", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Al-Sayyid Bedouin Sign Language", "iso_1_code": null, "iso_3_code": "syy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8913", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tebul Sign Language", "iso_1_code": null, "iso_3_code": "tsy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8914", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaapor Sign Language", "iso_1_code": null, "iso_3_code": "uks", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8915", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yolngu Sign Language", "iso_1_code": null, "iso_3_code": "ygs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8916", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yan-nhangu Sign Language", "iso_1_code": null, "iso_3_code": "yhs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8917", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8888", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8755", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Sino-Tibetan.json b/data/Sino-Tibetan.json index 34933acb8324d0b53b0bfca66d9f70010d174200..4f919b5d55b89817d842f5bc4b5feb08f5a3e65b 100644 --- a/data/Sino-Tibetan.json +++ b/data/Sino-Tibetan.json @@ -2,8240 +2,6230 @@ "name": "Sino-Tibetan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Chinese", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Chinese, Min Dong", "iso_1_code": "zh", "iso_3_code": "cdo", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8920", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Chinese, Jinyu", "iso_1_code": "zh", "iso_3_code": "cjy", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8921", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Chinese, Mandarin", "iso_1_code": "zh", "iso_3_code": "cmn", + "children": [], "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"lzh\")", + "original_lang_name": "literary_chinese", + "original_lang_code": "lzh", + "script": "Hani", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "8922", + "native_tokenizers": [], "scripts": [ "Hani" - ], - "own_tokenizer": true + ] }, { "name": "Pinghua, Northern", "iso_1_code": "zh", "iso_3_code": "cnp", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8923", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Chinese, Pu-Xian", "iso_1_code": "zh", "iso_3_code": "cpx", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8924", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Pinghua, Southern", "iso_1_code": "zh", "iso_3_code": "csp", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8925", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Chinese, Huizhou", "iso_1_code": "zh", "iso_3_code": "czh", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8926", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Chinese, Min Zhong", "iso_1_code": "zh", "iso_3_code": "czo", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8927", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Dungan", "iso_1_code": null, "iso_3_code": "dng", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8928", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Chinese, Gan", "iso_1_code": "zh", "iso_3_code": "gan", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8929", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Chinese, Hakka", "iso_1_code": "zh", "iso_3_code": "hak", + "children": [], "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"lzh\")", + "original_lang_name": "literary_chinese", + "original_lang_code": "lzh", + "script": "Hani", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "8930", + "native_tokenizers": [], "scripts": [ "Latn", "Hani" - ], - "own_tokenizer": true + ] }, { "name": "Chinese, Xiang", "iso_1_code": "zh", "iso_3_code": "hsn", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8931", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Chinese, Classical", "iso_1_code": "zh", "iso_3_code": "lzh", + "children": [], "tokenizers": { "Hani": { "full_object": "StanzaTokenizer(\"lzh\")", "original_lang_name": "literary_chinese", "original_lang_code": "lzh", - "scripts": [ - "Hani" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Hani", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "8932", - "scripts": [ + "native_tokenizers": [ "Hani" ], - "own_tokenizer": true + "scripts": [ + "Hani" + ] }, { "name": "Chinese, Min Bei", "iso_1_code": "zh", "iso_3_code": "mnp", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8933", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Chinese, Min Nan", "iso_1_code": "zh", "iso_3_code": "nan", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8934", - "scripts": [ - "Latn", - "Hani" - ], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Chinese, Wu", "iso_1_code": "zh", "iso_3_code": "wuu", + "children": [], "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"lzh\")", + "original_lang_name": "literary_chinese", + "original_lang_code": "lzh", + "script": "Hani", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "8935", + "native_tokenizers": [], "scripts": [ "Hani" - ], - "own_tokenizer": true + ] }, { "name": "Chinese, Yue", "iso_1_code": "zh", "iso_3_code": "yue", + "children": [], "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"lzh\")", + "original_lang_name": "literary_chinese", + "original_lang_code": "lzh", + "script": "Hani", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "8936", + "native_tokenizers": [], "scripts": [ "Hani" - ], - "own_tokenizer": true + ] } ], + "tokenizers": { + "Hani": { + "full_object": "StanzaTokenizer(\"lzh\")", + "original_lang_name": "literary_chinese", + "original_lang_code": "lzh", + "script": "Hani", + "class_name": "StanzaTokenizer" + } + }, "node_i": "8919", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tibeto-Burman", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Meitei", "iso_1_code": null, "iso_3_code": "mni", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8938", + "native_tokenizers": [], "scripts": [ "Latn", "Beng", "Mtei" - ], - "own_tokenizer": false + ] }, { "name": "Angami-Pochuri", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Naga, Mao", "iso_1_code": null, "iso_3_code": "nbi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8940", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Naga, Angami", "iso_1_code": null, "iso_3_code": "njm", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8941", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Naga, Khezha", "iso_1_code": null, "iso_3_code": "nkh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8942", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Naga, Northern Rengma", "iso_1_code": null, "iso_3_code": "nnl", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8943", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Naga, Pochuri", "iso_1_code": null, "iso_3_code": "npo", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8944", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Naga, Southern Rengma", "iso_1_code": null, "iso_3_code": "nre", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8945", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Naga, Chokri", "iso_1_code": null, "iso_3_code": "nri", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8946", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Naga, Sumi", "iso_1_code": null, "iso_3_code": "nsm", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8947", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Naga, Poumai", "iso_1_code": null, "iso_3_code": "pmx", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8948", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8939", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central Naga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Naga, Lotha", "iso_1_code": null, "iso_3_code": "njh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8950", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Naga, Ao", "iso_1_code": null, "iso_3_code": "njo", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8951", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Naga, Sangtam", "iso_1_code": null, "iso_3_code": "nsa", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8952", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Naga, Yimchungru", "iso_1_code": null, "iso_3_code": "yim", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8953", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8949", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central Tibeto-Burman", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Digarish", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Idu-Mishmi", "iso_1_code": null, "iso_3_code": "clk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8956", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Digaro-Mishmi", "iso_1_code": null, "iso_3_code": "mhu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8957", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8955", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hrusish", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hruso", "iso_1_code": null, "iso_3_code": "hru", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8959", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miji", "iso_1_code": null, "iso_3_code": "sjl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8960", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8958", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Keman", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Zakhring", "iso_1_code": null, "iso_3_code": "zkr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8962", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8961", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kho-Bwa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bugun", "iso_1_code": null, "iso_3_code": "bgg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8964", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chug", "iso_1_code": null, "iso_3_code": "cvg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8965", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lish", "iso_1_code": null, "iso_3_code": "lsh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8966", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sartang", "iso_1_code": null, "iso_3_code": "onp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8967", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sherdukpen", "iso_1_code": null, "iso_3_code": "sdp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8968", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Puroik", "iso_1_code": null, "iso_3_code": "suv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8969", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8963", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lepcha", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lepcha", "iso_1_code": null, "iso_3_code": "lep", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8971", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8970", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mijish", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Miju-Mishmi", "iso_1_code": null, "iso_3_code": "mxj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8973", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8972", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nungish", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Drung", "iso_1_code": null, "iso_3_code": "duu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8975", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Anong", "iso_1_code": null, "iso_3_code": "nun", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8976", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rawang", "iso_1_code": null, "iso_3_code": "raw", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8977", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8974", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tani", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Adi", "iso_1_code": null, "iso_3_code": "adi", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8979", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Adi, Galo", "iso_1_code": null, "iso_3_code": "adl", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8980", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Apatani", "iso_1_code": null, "iso_3_code": "apt", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8981", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mising", "iso_1_code": null, "iso_3_code": "mrg", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8982", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Na", "iso_1_code": null, "iso_3_code": "nbt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8983", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyishi", "iso_1_code": null, "iso_3_code": "njz", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8984", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tagin", "iso_1_code": null, "iso_3_code": "tgj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8985", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8978", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8954", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karbi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Amri Karbi", "iso_1_code": null, "iso_3_code": "ajz", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8987", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Karbi", "iso_1_code": null, "iso_3_code": "mjw", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8988", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8986", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karenic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Karen, Bwe", "iso_1_code": null, "iso_3_code": "bwe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8991", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kayah, Eastern", "iso_1_code": null, "iso_3_code": "eky", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8992", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karen, Geko", "iso_1_code": null, "iso_3_code": "ghk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8993", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kayaw", "iso_1_code": null, "iso_3_code": "kvl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8994", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karen, Geba", "iso_1_code": null, "iso_3_code": "kvq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8995", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kawyaw", "iso_1_code": null, "iso_3_code": "kxf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8996", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kayah, Western", "iso_1_code": null, "iso_3_code": "kyu", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "8997", + "native_tokenizers": [], "scripts": [ "Latn", "Kali", "Mymr" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "8990", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lahta", "iso_1_code": null, "iso_3_code": "kvt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "8999", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yinbaw", "iso_1_code": null, "iso_3_code": "kvu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9000", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yintale", "iso_1_code": null, "iso_3_code": "kvy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9001", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zayein", "iso_1_code": null, "iso_3_code": "kxk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9002", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kayan", "iso_1_code": null, "iso_3_code": "pdu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9003", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8998", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Peripheral", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pa\u2019o", "iso_1_code": null, "iso_3_code": "blk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9005", + "native_tokenizers": [], "scripts": [ "Mymr" - ], - "own_tokenizer": false + ] }, { "name": "Karen, Pwo Eastern", "iso_1_code": null, "iso_3_code": "kjp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9006", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karen, Phrae Pwo", "iso_1_code": null, "iso_3_code": "kjt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9007", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karen, Pwo Western", "iso_1_code": null, "iso_3_code": "pwo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9008", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karen, Pwo Northern", "iso_1_code": null, "iso_3_code": "pww", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9009", + "native_tokenizers": [], "scripts": [ "Thai" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9004", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Karen, Mobwa", "iso_1_code": null, "iso_3_code": "jkm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9011", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karen, Paku", "iso_1_code": null, "iso_3_code": "jkp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9012", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karen, S\u2019gaw", "iso_1_code": null, "iso_3_code": "ksw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9013", + "native_tokenizers": [], "scripts": [ "Mymr" - ], - "own_tokenizer": false + ] }, { "name": "Wewaw", "iso_1_code": null, "iso_3_code": "wea", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9014", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9010", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "8989", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuki-Chin", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Chin, Thaiphum", "iso_1_code": null, "iso_3_code": "cth", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9016", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Chin, Ngawn", "iso_1_code": null, "iso_3_code": "cnw", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9018", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Pangkhua", "iso_1_code": null, "iso_3_code": "pkh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9019", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chin, Tawr", "iso_1_code": null, "iso_3_code": "tcp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9020", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Chin, Bawm", "iso_1_code": null, "iso_3_code": "bgr", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9022", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chin, Bualkhaw", "iso_1_code": null, "iso_3_code": "cbl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9023", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chin, Falam", "iso_1_code": null, "iso_3_code": "cfm", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9024", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chin, Hakha", "iso_1_code": null, "iso_3_code": "cnh", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9025", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9021", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mizo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Biate", "iso_1_code": null, "iso_3_code": "biu", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9027", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Hmar", "iso_1_code": null, "iso_3_code": "hmr", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9028", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Hrangkhol", "iso_1_code": null, "iso_3_code": "hra", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9029", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mizo", "iso_1_code": null, "iso_3_code": "lus", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9030", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sakachep", "iso_1_code": null, "iso_3_code": "sch", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9031", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9026", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9017", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maraic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Chin, Zotung", "iso_1_code": null, "iso_3_code": "czt", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9033", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chin, Senthang", "iso_1_code": null, "iso_3_code": "sez", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9034", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chin, Zyphe", "iso_1_code": null, "iso_3_code": "zyp", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9035", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mara", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chin, Lautu", "iso_1_code": null, "iso_3_code": "clt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9037", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mara", "iso_1_code": null, "iso_3_code": "mrh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9038", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9036", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9032", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northwestern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Aimol", "iso_1_code": null, "iso_3_code": "aim", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9040", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Anal", "iso_1_code": null, "iso_3_code": "anm", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9041", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chiru", "iso_1_code": null, "iso_3_code": "cdf", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9042", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Naga, Kharam", "iso_1_code": null, "iso_3_code": "kfw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9043", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kom", "iso_1_code": null, "iso_3_code": "kmm", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9044", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Lamkang", "iso_1_code": null, "iso_3_code": "lmk", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9045", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Naga, Chothe", "iso_1_code": null, "iso_3_code": "nct", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9046", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Naga, Monsang", "iso_1_code": null, "iso_3_code": "nmh", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9047", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Naga, Tarao", "iso_1_code": null, "iso_3_code": "tro", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9048", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9039", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Peripheral", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Naga, Moyon", "iso_1_code": null, "iso_3_code": "nmo", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9051", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Purum", "iso_1_code": null, "iso_3_code": "pub", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9052", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ralte", "iso_1_code": null, "iso_3_code": "ral", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9053", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Simte", "iso_1_code": null, "iso_3_code": "smt", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9054", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sizang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Chin, Siyin", "iso_1_code": null, "iso_3_code": "csy", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9056", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Gangte", "iso_1_code": null, "iso_3_code": "gnb", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9057", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Vaiphei", "iso_1_code": null, "iso_3_code": "vap", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9058", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zo", "iso_1_code": null, "iso_3_code": "zom", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9059", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9055", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Thado", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Chin, Tedim", "iso_1_code": null, "iso_3_code": "ctd", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9061", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chin, Paite", "iso_1_code": null, "iso_3_code": "pck", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9062", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chin, Thado", "iso_1_code": null, "iso_3_code": "tcz", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9063", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9060", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9050", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Chin, Eastern Khumi", "iso_1_code": null, "iso_3_code": "cek", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9065", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mro-Khimi", "iso_1_code": null, "iso_3_code": "cmr", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9066", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chin, Khumi", "iso_1_code": null, "iso_3_code": "cnk", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9067", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chin, Songlai", "iso_1_code": null, "iso_3_code": "csj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9068", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chin, Sumtu", "iso_1_code": null, "iso_3_code": "csv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9069", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chin, Rungtu", "iso_1_code": null, "iso_3_code": "rtc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9070", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shendu", "iso_1_code": null, "iso_3_code": "shl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9071", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chin, Rawngtu", "iso_1_code": null, "iso_3_code": "weu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9072", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cho-Asho", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Asho", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chin, L\u00e4okt\u00fc", "iso_1_code": null, "iso_3_code": "cey", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9075", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chin, Laitu", "iso_1_code": null, "iso_3_code": "clj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9076", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chin, Asho", "iso_1_code": null, "iso_3_code": "csh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9077", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9074", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cho", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Chin, Kaang", "iso_1_code": null, "iso_3_code": "ckn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9079", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chin, Uppu", "iso_1_code": null, "iso_3_code": "cnb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9080", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chin, Daai", "iso_1_code": null, "iso_3_code": "dao", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9081", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chin, Matu", "iso_1_code": null, "iso_3_code": "hlt", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9082", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chin, M\u00fc\u00fcn", "iso_1_code": null, "iso_3_code": "mwq", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9083", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9078", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9073", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9064", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9049", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9015", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngwi-Burmese", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Burmish", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Achang", "iso_1_code": null, "iso_3_code": "acn", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9087", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zaiwa", "iso_1_code": null, "iso_3_code": "atb", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9088", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Pela", "iso_1_code": null, "iso_3_code": "bxd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9089", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hpon", "iso_1_code": null, "iso_3_code": "hpo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9090", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lacid", "iso_1_code": null, "iso_3_code": "lsi", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9091", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Lhao Vo", "iso_1_code": null, "iso_3_code": "mhx", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9092", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9086", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Danu", "iso_1_code": null, "iso_3_code": "dnv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9094", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Intha", "iso_1_code": null, "iso_3_code": "int", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9095", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Burmese", "iso_1_code": "my", "iso_3_code": "mya", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9096", + "native_tokenizers": [], "scripts": [ "Mymr" - ], - "own_tokenizer": false + ] }, { "name": "Rakhine", "iso_1_code": null, "iso_3_code": "rki", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9097", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Marma", "iso_1_code": null, "iso_3_code": "rmz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9098", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Taungyo", "iso_1_code": null, "iso_3_code": "tco", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9099", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tavoyan", "iso_1_code": null, "iso_3_code": "tvn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9100", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9093", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9085", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mru", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chin, Anu-Khongso", "iso_1_code": null, "iso_3_code": "anl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9102", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mru", "iso_1_code": null, "iso_3_code": "mro", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9103", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9101", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngwi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Hlersu", "iso_1_code": null, "iso_3_code": "hle", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9106", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jinuo, Youle", "iso_1_code": null, "iso_3_code": "jiu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9107", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jinuo, Buyuan", "iso_1_code": null, "iso_3_code": "jiy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9108", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lahu Shi", "iso_1_code": null, "iso_3_code": "lhi", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9109", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Lahu", "iso_1_code": null, "iso_3_code": "lhu", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9110", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Lisu", "iso_1_code": null, "iso_3_code": "lis", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9111", + "native_tokenizers": [], "scripts": [ "Lisu" - ], - "own_tokenizer": false + ] }, { "name": "Kucong", "iso_1_code": null, "iso_3_code": "lkc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9112", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lamu", "iso_1_code": null, "iso_3_code": "llh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9113", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lipo", "iso_1_code": null, "iso_3_code": "lpo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9114", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lawu", "iso_1_code": null, "iso_3_code": "lwu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9115", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nusu", "iso_1_code": null, "iso_3_code": "nuf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9116", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lolopo", "iso_1_code": null, "iso_3_code": "ycl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9117", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lalo, Dongshanba", "iso_1_code": null, "iso_3_code": "yik", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9118", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miqie", "iso_1_code": null, "iso_3_code": "yiq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9119", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lalu, Eastern", "iso_1_code": null, "iso_3_code": "yit", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9120", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Limi", "iso_1_code": null, "iso_3_code": "ylm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9121", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mili", "iso_1_code": null, "iso_3_code": "ymh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9122", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lang\u2019e", "iso_1_code": null, "iso_3_code": "yne", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9123", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sani", "iso_1_code": null, "iso_3_code": "ysn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9124", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lolopo, Southern", "iso_1_code": null, "iso_3_code": "ysp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9125", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Talu", "iso_1_code": null, "iso_3_code": "yta", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9126", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tanglang", "iso_1_code": null, "iso_3_code": "ytl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9127", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lalu, Western", "iso_1_code": null, "iso_3_code": "ywl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9128", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lalo, Central", "iso_1_code": null, "iso_3_code": "ywt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9129", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zauzou", "iso_1_code": null, "iso_3_code": "zal", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9130", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9105", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nuosu", "iso_1_code": "ii", "iso_3_code": "iii", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9132", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Katso", "iso_1_code": null, "iso_3_code": "kaf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9133", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Samei", "iso_1_code": null, "iso_3_code": "smh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9134", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chesu", "iso_1_code": null, "iso_3_code": "ych", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9135", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gepo", "iso_1_code": null, "iso_3_code": "ygp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9136", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nasu, Wusa", "iso_1_code": null, "iso_3_code": "yig", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9137", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Awu", "iso_1_code": null, "iso_3_code": "yiu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9138", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Naluo", "iso_1_code": null, "iso_3_code": "ylo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9139", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aluo", "iso_1_code": null, "iso_3_code": "yna", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9140", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Samatao", "iso_1_code": null, "iso_3_code": "ysd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9141", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sanie", "iso_1_code": null, "iso_3_code": "ysy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9142", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yi, Wuding-Luquan", "iso_1_code": null, "iso_3_code": "ywq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9143", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nasu, Wumeng", "iso_1_code": null, "iso_3_code": "ywu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9144", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ayizi", "iso_1_code": null, "iso_3_code": "yyz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9145", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9131", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southeastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Alugu", "iso_1_code": null, "iso_3_code": "aub", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9147", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Azha", "iso_1_code": null, "iso_3_code": "aza", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9148", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Laghuu", "iso_1_code": null, "iso_3_code": "lgh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9149", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nisu, Eastern", "iso_1_code": null, "iso_3_code": "nos", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9150", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nisu, Southern", "iso_1_code": null, "iso_3_code": "nsd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9151", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nisu, Northwestern", "iso_1_code": null, "iso_3_code": "nsf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9152", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nisu, Southwestern", "iso_1_code": null, "iso_3_code": "nsv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9153", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mantsi", "iso_1_code": null, "iso_3_code": "nty", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9154", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Phula", "iso_1_code": null, "iso_3_code": "phh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9155", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bokha", "iso_1_code": null, "iso_3_code": "ybk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9156", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Phowa, Hlepho", "iso_1_code": null, "iso_3_code": "yhl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9157", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ache", "iso_1_code": null, "iso_3_code": "yif", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9158", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pholo", "iso_1_code": null, "iso_3_code": "yip", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9159", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nisu, Northern", "iso_1_code": null, "iso_3_code": "yiv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9160", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Axi", "iso_1_code": null, "iso_3_code": "yix", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9161", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Azhe", "iso_1_code": null, "iso_3_code": "yiz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9162", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khlula", "iso_1_code": null, "iso_3_code": "ykl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9163", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kua-nsi", "iso_1_code": null, "iso_3_code": "ykn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9164", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kathu", "iso_1_code": null, "iso_3_code": "ykt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9165", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuamasi", "iso_1_code": null, "iso_3_code": "yku", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9166", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Muji, Southern", "iso_1_code": null, "iso_3_code": "ymc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9167", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Moji", "iso_1_code": null, "iso_3_code": "ymi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9168", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Muji, Qila", "iso_1_code": null, "iso_3_code": "ymq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9169", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Muji, Northern", "iso_1_code": null, "iso_3_code": "ymx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9170", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Muzi", "iso_1_code": null, "iso_3_code": "ymz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9171", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Phala", "iso_1_code": null, "iso_3_code": "ypa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9172", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Phowa, Labo", "iso_1_code": null, "iso_3_code": "ypb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9173", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Phola", "iso_1_code": null, "iso_3_code": "ypg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9174", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Phupha", "iso_1_code": null, "iso_3_code": "yph", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9175", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Phuma", "iso_1_code": null, "iso_3_code": "ypm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9176", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Phowa, Ani", "iso_1_code": null, "iso_3_code": "ypn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9177", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Phola, Alo", "iso_1_code": null, "iso_3_code": "ypo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9178", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Phupa", "iso_1_code": null, "iso_3_code": "ypp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9179", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Phuza", "iso_1_code": null, "iso_3_code": "ypz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9180", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sonaga", "iso_1_code": null, "iso_3_code": "ysg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9181", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nisi", "iso_1_code": null, "iso_3_code": "yso", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9182", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Thopho", "iso_1_code": null, "iso_3_code": "ytp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9183", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zokhuo", "iso_1_code": null, "iso_3_code": "yzk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9184", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9146", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Akeu", "iso_1_code": null, "iso_3_code": "aeu", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9186", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Akha", "iso_1_code": null, "iso_3_code": "ahk", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9187", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Biyo", "iso_1_code": null, "iso_3_code": "byo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9188", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "C\u00f4\u00f4ng", "iso_1_code": null, "iso_3_code": "cnc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9189", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Enu", "iso_1_code": null, "iso_3_code": "enu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9190", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hani", "iso_1_code": null, "iso_3_code": "hni", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9191", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Honi", "iso_1_code": null, "iso_3_code": "how", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9192", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaduo", "iso_1_code": null, "iso_3_code": "ktp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9193", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lopi", "iso_1_code": null, "iso_3_code": "lov", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9194", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mpi", "iso_1_code": null, "iso_3_code": "mpz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9195", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Phana\u2019", "iso_1_code": null, "iso_3_code": "phq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9196", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sangkong", "iso_1_code": null, "iso_3_code": "sgk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9197", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sila", "iso_1_code": null, "iso_3_code": "slt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9198", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chepya", "iso_1_code": null, "iso_3_code": "ycp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9199", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Muda", "iso_1_code": null, "iso_3_code": "ymd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9200", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bisoid", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bisu", "iso_1_code": null, "iso_3_code": "bzi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9202", + "native_tokenizers": [], "scripts": [ "Thai" - ], - "own_tokenizer": false + ] }, { "name": "Laomian", "iso_1_code": null, "iso_3_code": "lwm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9203", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Phunoi", "iso_1_code": null, "iso_3_code": "pho", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9204", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pyen", "iso_1_code": null, "iso_3_code": "pyy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9205", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9201", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9185", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Laopang", "iso_1_code": null, "iso_3_code": "lbg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9207", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ugong", "iso_1_code": null, "iso_3_code": "ugo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9208", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9206", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9104", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9084", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northeastern Tibeto-Burman", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bai, Central", "iso_1_code": null, "iso_3_code": "bca", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9211", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bai, Panyi", "iso_1_code": null, "iso_3_code": "bfc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9212", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bai, Southern", "iso_1_code": null, "iso_3_code": "bfs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9213", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bai, Lama", "iso_1_code": null, "iso_3_code": "lay", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9214", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9210", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Baima", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Baima", "iso_1_code": null, "iso_3_code": "bqh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9216", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9215", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ersuish", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ersu", "iso_1_code": null, "iso_3_code": "ers", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9218", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9217", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Naic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Namuyi", "iso_1_code": null, "iso_3_code": "nmy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9220", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Narua", "iso_1_code": null, "iso_3_code": "nru", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9221", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Naxi", "iso_1_code": null, "iso_3_code": "nxq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9222", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shuhi", "iso_1_code": null, "iso_3_code": "sxg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9223", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9219", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Qiangic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Qiang, Northern", "iso_1_code": null, "iso_3_code": "cng", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9225", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Minyag, Eastern", "iso_1_code": null, "iso_3_code": "emq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9226", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Guiqiong", "iso_1_code": null, "iso_3_code": "gqi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9227", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pumi, Northern", "iso_1_code": null, "iso_3_code": "pmi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9228", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pumi, Southern", "iso_1_code": null, "iso_3_code": "pmj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9229", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Queyu", "iso_1_code": null, "iso_3_code": "qvy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9230", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Qiang, Southern", "iso_1_code": null, "iso_3_code": "qxs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9231", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Minyag, Western", "iso_1_code": null, "iso_3_code": "wmg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9232", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zhaba", "iso_1_code": null, "iso_3_code": "zhb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9233", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9224", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "rGyalrongic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Horpa", "iso_1_code": null, "iso_3_code": "ero", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9235", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "sTodsde", "iso_1_code": null, "iso_3_code": "jih", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9236", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lavrung", "iso_1_code": null, "iso_3_code": "jiq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9237", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jiarong", "iso_1_code": null, "iso_3_code": "jya", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9238", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9234", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tujia", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tujia, Northern", "iso_1_code": null, "iso_3_code": "tji", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9240", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tujia, Southern", "iso_1_code": null, "iso_3_code": "tjs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9241", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9239", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9209", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Boro-Garo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Deori", "iso_1_code": null, "iso_3_code": "der", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9244", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Garo", "iso_1_code": null, "iso_3_code": "grt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9245", + "native_tokenizers": [], "scripts": [ "Beng" - ], - "own_tokenizer": false + ] }, { "name": "Megam", "iso_1_code": null, "iso_3_code": "mef", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9246", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tippera", "iso_1_code": null, "iso_3_code": "tpe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9247", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Boro-Tiwa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Tiwa", "iso_1_code": null, "iso_3_code": "lax", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9249", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Boro", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Boro", "iso_1_code": null, "iso_3_code": "brx", + "children": [], "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" } }, - "children": [], "node_i": "9251", + "native_tokenizers": [ + "Deva" + ], "scripts": [ "Latn", "Deva" - ], - "own_tokenizer": false + ] }, { "name": "Kachari", "iso_1_code": null, "iso_3_code": "xac", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9252", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "9250", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "9248", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dimasa-Kokborok", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Dimasa", "iso_1_code": null, "iso_3_code": "dis", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9254", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kok Borok", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Riang", "iso_1_code": null, "iso_3_code": "ria", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9256", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kok Borok", "iso_1_code": null, "iso_3_code": "trp", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9257", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Usoi", "iso_1_code": null, "iso_3_code": "usi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9258", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9255", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9253", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koch", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Atong", "iso_1_code": null, "iso_3_code": "aot", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9260", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koch", "iso_1_code": null, "iso_3_code": "kdq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9261", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rabha", "iso_1_code": null, "iso_3_code": "rah", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9262", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ruga", "iso_1_code": null, "iso_3_code": "ruh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9263", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9259", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern Naga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Naga, Khiamniungan", "iso_1_code": null, "iso_3_code": "kix", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9265", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Naga, Lainong", "iso_1_code": null, "iso_3_code": "lzn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9266", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Naga, Chang", "iso_1_code": null, "iso_3_code": "nbc", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9267", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Naga, Konyak", "iso_1_code": null, "iso_3_code": "nbe", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9268", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Naga, Nocte", "iso_1_code": null, "iso_3_code": "njb", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9269", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Naga, Lao", "iso_1_code": null, "iso_3_code": "nlq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9270", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Naga, Wancho", "iso_1_code": null, "iso_3_code": "nnp", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9271", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Naga, Ponyo-Gongwang", "iso_1_code": null, "iso_3_code": "npg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9272", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Naga, Phom", "iso_1_code": null, "iso_3_code": "nph", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9273", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Naga, Chen-Kayu", "iso_1_code": null, "iso_3_code": "nqq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9274", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Naga, Tangshang", "iso_1_code": null, "iso_3_code": "nst", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9275", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Naga, Tutsa", "iso_1_code": null, "iso_3_code": "tvt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9276", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Naga, Paungnyuan", "iso_1_code": null, "iso_3_code": "umn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9277", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9264", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "9243", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dhimalish", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dhimal", "iso_1_code": null, "iso_3_code": "dhi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9279", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Toto", "iso_1_code": null, "iso_3_code": "txo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9280", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9278", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jingppaw-Asakia", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Asakian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chak", "iso_1_code": null, "iso_3_code": "ckh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9283", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kadu", "iso_1_code": null, "iso_3_code": "zkd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9284", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kanan", "iso_1_code": null, "iso_3_code": "zkn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9285", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9282", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jingphaw", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Jingpho", "iso_1_code": null, "iso_3_code": "kac", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9287", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Singpho", "iso_1_code": null, "iso_3_code": "sgp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9288", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Taman", "iso_1_code": null, "iso_3_code": "tcl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9289", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Turung", "iso_1_code": null, "iso_3_code": "try", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9290", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9286", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9281", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "9242", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tangkhulic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Naga, Khoibu", "iso_1_code": null, "iso_3_code": "nkb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9292", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Naga, Tangkhul", "iso_1_code": null, "iso_3_code": "nmf", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9293", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Naga, Maring", "iso_1_code": null, "iso_3_code": "nng", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9294", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Naga, Tangkhul", "iso_1_code": null, "iso_3_code": "ntx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9295", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9291", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Darlong", "iso_1_code": null, "iso_3_code": "dln", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9297", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Koro", "iso_1_code": null, "iso_3_code": "jkr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9298", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Naga, Makuri", "iso_1_code": null, "iso_3_code": "jmn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9299", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khamba", "iso_1_code": null, "iso_3_code": "kbg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9300", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Naga, Long Phuri", "iso_1_code": null, "iso_3_code": "lpn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9301", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Naga, Inpui", "iso_1_code": null, "iso_3_code": "nkf", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9302", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Naga, Puimei", "iso_1_code": null, "iso_3_code": "npu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9303", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Naga, Akyaung Ari", "iso_1_code": null, "iso_3_code": "nqy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9304", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Naga, Kokak", "iso_1_code": null, "iso_3_code": "nxk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9305", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Naga, Jejara", "iso_1_code": null, "iso_3_code": "pzn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9306", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ranglong", "iso_1_code": null, "iso_3_code": "rnl", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9307", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9296", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western Naga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Hani": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Naga, Rongmei", "iso_1_code": null, "iso_3_code": "nbu", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9309", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Naga, Liangmai", "iso_1_code": null, "iso_3_code": "njn", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9310", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Koireng", "iso_1_code": null, "iso_3_code": "nkd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9311", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Naga, Thangal", "iso_1_code": null, "iso_3_code": "nki", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9312", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Naga, Maram", "iso_1_code": null, "iso_3_code": "nma", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9313", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Naga, Mzieme", "iso_1_code": null, "iso_3_code": "nme", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9314", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Naga, Zeme", "iso_1_code": null, "iso_3_code": "nzm", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"zh\", {\"nlp\": {\"tokenizer\": {\"segmenter\": \"jieba\"}}})", - "original_lang_name": "chinese", - "original_lang_code": "zho", - "scripts": [ - "Latn", - "Hani" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "9315", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9308", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western Tibeto-Burman", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bodish", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gongduk", "iso_1_code": null, "iso_3_code": "goe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9318", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Monpa, Kalaktang", "iso_1_code": null, "iso_3_code": "kkf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9319", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lhokpu", "iso_1_code": null, "iso_3_code": "lhp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9320", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Olekha", "iso_1_code": null, "iso_3_code": "ole", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9321", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tshangla", "iso_1_code": null, "iso_3_code": "tsj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9322", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central Bodish", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Choni", "iso_1_code": null, "iso_3_code": "cda", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9324", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tseku", "iso_1_code": null, "iso_3_code": "tsk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9325", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Amdo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tibetan, Amdo", "iso_1_code": null, "iso_3_code": "adx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9327", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9326", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tibetan, Central", "iso_1_code": "bo", "iso_3_code": "bod", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9329", + "native_tokenizers": [], "scripts": [ "Tibt" - ], - "own_tokenizer": false + ] }, { "name": "gTsang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dolpo", "iso_1_code": null, "iso_3_code": "dre", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9331", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gyalsumdo", "iso_1_code": null, "iso_3_code": "gyo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9332", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Humla", "iso_1_code": null, "iso_3_code": "hut", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9333", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jirel", "iso_1_code": null, "iso_3_code": "jul", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9334", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kyerung", "iso_1_code": null, "iso_3_code": "kgy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9335", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nubri", "iso_1_code": null, "iso_3_code": "kte", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9336", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lhomi", "iso_1_code": null, "iso_3_code": "lhm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9337", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lhowa", "iso_1_code": null, "iso_3_code": "loy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9338", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mugom-Karmarong", "iso_1_code": null, "iso_3_code": "muk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9339", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hyolmo", "iso_1_code": null, "iso_3_code": "scp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9340", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Syuba", "iso_1_code": null, "iso_3_code": "syw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9341", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tichurong", "iso_1_code": null, "iso_3_code": "tcn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9342", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tsum", "iso_1_code": null, "iso_3_code": "ttz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9343", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sherpa", "iso_1_code": null, "iso_3_code": "xsr", - "tokenizers": {}, "children": [], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "9344", + "native_tokenizers": [], "scripts": [ "Deva" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "9330", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Brokkat", "iso_1_code": null, "iso_3_code": "bro", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9346", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chocangacakha", "iso_1_code": null, "iso_3_code": "cgk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9347", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dzongkha", "iso_1_code": "dz", "iso_3_code": "dzo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9348", + "native_tokenizers": [], "scripts": [ "Tibt" - ], - "own_tokenizer": false + ] }, { "name": "Groma", "iso_1_code": null, "iso_3_code": "gro", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9349", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lakha", "iso_1_code": null, "iso_3_code": "lkh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9350", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lunanakha", "iso_1_code": null, "iso_3_code": "luk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9351", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Layakha", "iso_1_code": null, "iso_3_code": "lya", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9352", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Brokpake", "iso_1_code": null, "iso_3_code": "sgt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9353", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sikkimese", "iso_1_code": null, "iso_3_code": "sip", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9354", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9345", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jad", "iso_1_code": null, "iso_3_code": "jda", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9356", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Stod Bhoti", "iso_1_code": null, "iso_3_code": "sbu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9357", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Spiti Bhoti", "iso_1_code": null, "iso_3_code": "spt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9358", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9355", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "9328", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khams", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tibetan, Khams", "iso_1_code": null, "iso_3_code": "khg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9360", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9359", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Naaba", "iso_1_code": null, "iso_3_code": "nao", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9362", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Walungge", "iso_1_code": null, "iso_3_code": "ola", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9363", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9361", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Balti", "iso_1_code": null, "iso_3_code": "bft", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9365", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Changthang", "iso_1_code": null, "iso_3_code": "cna", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9366", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ladakhi", "iso_1_code": null, "iso_3_code": "lbj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9367", + "native_tokenizers": [], "scripts": [ "Tibt" - ], - "own_tokenizer": false + ] }, { "name": "Purig", "iso_1_code": null, "iso_3_code": "prx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9368", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zangskari", "iso_1_code": null, "iso_3_code": "zau", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9369", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9364", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "9323", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East Bodish", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dakpakha", "iso_1_code": null, "iso_3_code": "dka", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9371", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Monpa, Tawang", "iso_1_code": null, "iso_3_code": "twm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9372", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bumthang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dzalakha", "iso_1_code": null, "iso_3_code": "dzl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9374", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bumthangkha", "iso_1_code": null, "iso_3_code": "kjz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9375", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyenkha", "iso_1_code": null, "iso_3_code": "neh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9376", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nupbikha", "iso_1_code": null, "iso_3_code": "npb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9377", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chalikha", "iso_1_code": null, "iso_3_code": "tgf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9378", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khengkha", "iso_1_code": null, "iso_3_code": "xkf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9379", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kurtokha", "iso_1_code": null, "iso_3_code": "xkz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9380", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9373", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9370", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Bodish", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dura", "iso_1_code": null, "iso_3_code": "drq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9382", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaike", "iso_1_code": null, "iso_3_code": "kzq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9383", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ghale", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ghale, Southern", "iso_1_code": null, "iso_3_code": "ghe", - "tokenizers": {}, "children": [], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "9385", + "native_tokenizers": [], "scripts": [ "Deva" - ], - "own_tokenizer": false + ] }, { "name": "Ghale, Northern", "iso_1_code": null, "iso_3_code": "ghh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9386", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuke", "iso_1_code": null, "iso_3_code": "ght", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9387", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "9384", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gurung-Tamang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gurungic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chantyal", "iso_1_code": null, "iso_3_code": "chx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9390", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gurung", "iso_1_code": null, "iso_3_code": "gvr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9391", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyeshangte", "iso_1_code": null, "iso_3_code": "nmm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9392", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nar Phu", "iso_1_code": null, "iso_3_code": "npa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9393", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Seke", "iso_1_code": null, "iso_3_code": "skj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9394", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Thakali", "iso_1_code": null, "iso_3_code": "ths", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9395", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9389", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tamang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tamang, Eastern", "iso_1_code": null, "iso_3_code": "taj", - "tokenizers": {}, "children": [], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "9397", + "native_tokenizers": [], "scripts": [ "Deva" - ], - "own_tokenizer": false + ] }, { "name": "Tamang, Western", "iso_1_code": null, "iso_3_code": "tdg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9398", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tamang, Eastern Gorkha", "iso_1_code": null, "iso_3_code": "tge", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9399", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "9396", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "9388", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "9381", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Himalayish", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Almora", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Byangsi", "iso_1_code": null, "iso_3_code": "bee", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9402", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chaudangsi", "iso_1_code": null, "iso_3_code": "cdn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9403", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Darmiya", "iso_1_code": null, "iso_3_code": "drd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9404", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rangkas", "iso_1_code": null, "iso_3_code": "rgk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9405", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9401", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kinauri", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gahri", "iso_1_code": null, "iso_3_code": "bfu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9407", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kinnauri, Chitkuli", "iso_1_code": null, "iso_3_code": "cik", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9408", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jangshung", "iso_1_code": null, "iso_3_code": "jna", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9409", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kinnauri", "iso_1_code": null, "iso_3_code": "kfk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9410", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pattani", "iso_1_code": null, "iso_3_code": "lae", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9411", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tinani", "iso_1_code": null, "iso_3_code": "lbf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9412", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kinnauri, Bhoti", "iso_1_code": null, "iso_3_code": "nes", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9413", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rongpo", "iso_1_code": null, "iso_3_code": "rnp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9414", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shumcho", "iso_1_code": null, "iso_3_code": "scu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9415", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sunam", "iso_1_code": null, "iso_3_code": "ssk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9416", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kinnauri, Chhoyul", "iso_1_code": null, "iso_3_code": "tpq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9417", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kanashi", "iso_1_code": null, "iso_3_code": "xns", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9418", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9406", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9400", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "9317", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Himalayan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Central Himalayan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chepang-Bhujel", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bhujel", "iso_1_code": null, "iso_3_code": "byh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9422", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chepang", "iso_1_code": null, "iso_3_code": "cdm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9423", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9421", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kham-Magar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kham", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kham, Gamal", "iso_1_code": null, "iso_3_code": "kgj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9426", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kham, Eastern Parbate", "iso_1_code": null, "iso_3_code": "kif", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9427", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kham, Sheshi", "iso_1_code": null, "iso_3_code": "kip", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9428", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kham, Western Parbate", "iso_1_code": null, "iso_3_code": "kjl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9429", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9425", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Magar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Magar, Eastern", "iso_1_code": null, "iso_3_code": "mgp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9431", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Magar, Western", "iso_1_code": null, "iso_3_code": "mrd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9432", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9430", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9424", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Newar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Newar", "iso_1_code": null, "iso_3_code": "new", - "tokenizers": {}, "children": [], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "9434", + "native_tokenizers": [], "scripts": [ "Deva" - ], - "own_tokenizer": false + ] }, { "name": "Pahari", "iso_1_code": null, "iso_3_code": "phj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9435", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "9433", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Raute-Raji", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Rawat", "iso_1_code": null, "iso_3_code": "jnl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9437", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Raute", "iso_1_code": null, "iso_3_code": "rau", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9438", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Raji", "iso_1_code": null, "iso_3_code": "rji", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9439", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9436", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Thangmi-Baraamu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Baram", "iso_1_code": null, "iso_3_code": "brd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9441", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Thangmi", "iso_1_code": null, "iso_3_code": "thf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9442", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9440", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "9420", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kiranti", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Athpariya", "iso_1_code": null, "iso_3_code": "aph", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9445", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bantawa", "iso_1_code": null, "iso_3_code": "bap", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9446", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Belhariya", "iso_1_code": null, "iso_3_code": "byw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9447", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chhintang", "iso_1_code": null, "iso_3_code": "ctn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9448", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chhiling", "iso_1_code": null, "iso_3_code": "cur", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9449", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chukwa", "iso_1_code": null, "iso_3_code": "cuw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9450", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mewahang, Eastern", "iso_1_code": null, "iso_3_code": "emg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9451", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kulung", "iso_1_code": null, "iso_3_code": "kle", - "tokenizers": {}, "children": [], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "9452", + "native_tokenizers": [], "scripts": [ "Deva" - ], - "own_tokenizer": false + ] }, { "name": "Lohorung", "iso_1_code": null, "iso_3_code": "lbr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9453", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Limbu", "iso_1_code": null, "iso_3_code": "lif", - "tokenizers": {}, "children": [], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "9454", + "native_tokenizers": [], "scripts": [ "Deva", "Limb" - ], - "own_tokenizer": false + ] }, { "name": "Mugali", "iso_1_code": null, "iso_3_code": "lmh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9455", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yamphu, Southern", "iso_1_code": null, "iso_3_code": "lrr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9456", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yakkha, Chhathare", "iso_1_code": null, "iso_3_code": "luu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9457", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nachiring", "iso_1_code": null, "iso_3_code": "ncd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9458", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Phangduwali", "iso_1_code": null, "iso_3_code": "phw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9459", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Puma", "iso_1_code": null, "iso_3_code": "pum", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9460", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dungmali", "iso_1_code": null, "iso_3_code": "raa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9461", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chamling", "iso_1_code": null, "iso_3_code": "rab", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9462", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mewahang, Western", "iso_1_code": null, "iso_3_code": "raf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9463", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saam", "iso_1_code": null, "iso_3_code": "raq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9464", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sampang", "iso_1_code": null, "iso_3_code": "rav", - "tokenizers": {}, "children": [], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "9465", + "native_tokenizers": [], "scripts": [ "Deva" - ], - "own_tokenizer": false + ] }, { "name": "Yakkha", "iso_1_code": null, "iso_3_code": "ybh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9466", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yamphu", "iso_1_code": null, "iso_3_code": "ybi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9467", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "9444", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bahing", "iso_1_code": null, "iso_3_code": "bhj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9469", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dumi", "iso_1_code": null, "iso_3_code": "dus", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9470", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jerung", "iso_1_code": null, "iso_3_code": "jee", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9471", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koyee", "iso_1_code": null, "iso_3_code": "kkt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9472", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khaling", "iso_1_code": null, "iso_3_code": "klr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9473", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sunwar", "iso_1_code": null, "iso_3_code": "suz", - "tokenizers": {}, "children": [], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "9474", + "native_tokenizers": [], "scripts": [ "Deva" - ], - "own_tokenizer": false + ] }, { "name": "Thulung", "iso_1_code": null, "iso_3_code": "tdh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9475", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tilung", "iso_1_code": null, "iso_3_code": "tij", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9476", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wayu", "iso_1_code": null, "iso_3_code": "vay", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9477", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wambule", "iso_1_code": null, "iso_3_code": "wme", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9478", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "9468", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "9443", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "9419", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "9316", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "8937", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Hani": { + "full_object": "StanzaTokenizer(\"lzh\")", + "original_lang_name": "literary_chinese", + "original_lang_code": "lzh", + "script": "Hani", + "class_name": "StanzaTokenizer" + }, + "Deva": { + "full_object": "IndicNLPTokenizer(\"hi\")", + "original_lang_name": "bodo", + "original_lang_code": "brx", + "script": "Deva", + "class_name": "IndicNLPTokenizer" + } + }, "node_i": "8918", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Siouan-Catawban.json b/data/Siouan-Catawban.json index fea9ff276f4f2180072520665805e71f41e67519..7ffa8e9410cae81a3cdb6361b347b0677ff99c96 100644 --- a/data/Siouan-Catawban.json +++ b/data/Siouan-Catawban.json @@ -2,290 +2,290 @@ "name": "Siouan-Catawban", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Catawban", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Catawba", "iso_1_code": null, "iso_3_code": "chc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9481", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Woccon", "iso_1_code": null, "iso_3_code": "xwc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9482", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9480", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Siouan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mandan", "iso_1_code": null, "iso_3_code": "mhq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9484", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mississippi Valley-Ohio Valley Siouan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Michigamea", "iso_1_code": null, "iso_3_code": "cmm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9486", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ofo", "iso_1_code": null, "iso_3_code": "ofo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9487", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tutelo", "iso_1_code": null, "iso_3_code": "tta", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9488", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mississippi Valley Siouan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Biloxi", "iso_1_code": null, "iso_3_code": "bll", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9490", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chiwere-Winnebago", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Iowa-Oto", "iso_1_code": null, "iso_3_code": "iow", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9492", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ho-Chunk", "iso_1_code": null, "iso_3_code": "win", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9493", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9491", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dakota", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Assiniboine", "iso_1_code": null, "iso_3_code": "asb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9495", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dakota", "iso_1_code": null, "iso_3_code": "dak", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9496", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Lakota", "iso_1_code": null, "iso_3_code": "lkt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9497", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Stoney", "iso_1_code": null, "iso_3_code": "sto", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9498", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9494", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dhegihan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kansa", "iso_1_code": null, "iso_3_code": "ksk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9500", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Omaha-Ponca", "iso_1_code": null, "iso_3_code": "oma", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9501", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Osage", "iso_1_code": null, "iso_3_code": "osa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9502", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Quapaw", "iso_1_code": null, "iso_3_code": "qua", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9503", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9499", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9489", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9485", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Missouri River Siouan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Crow", "iso_1_code": null, "iso_3_code": "cro", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9505", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hidatsa", "iso_1_code": null, "iso_3_code": "hid", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9506", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9504", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9483", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9479", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Skou.json b/data/Skou.json index 0449c2c0724438175e70f5b0ab0ab29c11e7932d..29b9c480f7c39d92e7b33171a012630c446139ea 100644 --- a/data/Skou.json +++ b/data/Skou.json @@ -2,231 +2,231 @@ "name": "Skou", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "I\u2019saka", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "I\u2019saka", "iso_1_code": null, "iso_3_code": "ksi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9509", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9508", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Skou-Serra-Lagoon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nuclear Skou", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Eastern Skou", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dumo", "iso_1_code": null, "iso_3_code": "vam", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9513", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wutung", "iso_1_code": null, "iso_3_code": "wut", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9514", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9512", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Skou", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Skou", "iso_1_code": null, "iso_3_code": "skv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9516", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9515", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9511", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Serra Hills", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lagoon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bauni", "iso_1_code": null, "iso_3_code": "bpe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9519", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bouni", "iso_1_code": null, "iso_3_code": "suo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9520", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uni", "iso_1_code": null, "iso_3_code": "uni", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9521", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9518", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Puari", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Puare", "iso_1_code": null, "iso_3_code": "pux", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9523", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9522", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rawo-Main Serra", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Main Serra", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pin", "iso_1_code": null, "iso_3_code": "wmx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9526", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9525", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rawo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Rawo", "iso_1_code": null, "iso_3_code": "rwa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9528", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9527", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9524", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9517", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9510", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9507", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Somahai.json b/data/Somahai.json index a7e4e9e211e82fbea472bfb68436f84744563af4..8d2388998225919a1d3bb7592d9b3ed4720f2482 100644 --- a/data/Somahai.json +++ b/data/Somahai.json @@ -2,30 +2,30 @@ "name": "Somahai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Momina", "iso_1_code": null, "iso_3_code": "mmb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9530", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Momuna", "iso_1_code": null, "iso_3_code": "mqf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9531", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9529", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/South Bougainville.json b/data/South Bougainville.json index b5011f69002aa74c18a23d2ba36179e1c88c60e9..bd5f3a5ec5335142a90f163d99ae7306fea3ce02 100644 --- a/data/South Bougainville.json +++ b/data/South Bougainville.json @@ -2,124 +2,124 @@ "name": "South Bougainville", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Buin", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Terei", "iso_1_code": null, "iso_3_code": "buo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9564", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Motuna", "iso_1_code": null, "iso_3_code": "siw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9565", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uisai", "iso_1_code": null, "iso_3_code": "uis", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9566", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9563", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nasioi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Koromira", "iso_1_code": null, "iso_3_code": "kqj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9568", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Daantanai\u2019", "iso_1_code": null, "iso_3_code": "lni", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9569", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Naasioi", "iso_1_code": null, "iso_3_code": "nas", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9570", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sibe", "iso_1_code": null, "iso_3_code": "nco", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9571", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oune", "iso_1_code": null, "iso_3_code": "oue", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9572", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Simeku", "iso_1_code": null, "iso_3_code": "smz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9573", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9567", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9562", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/South-Central Papuan.json b/data/South-Central Papuan.json index da6d8757535adcd6d20f128232f774bb3ab2f40f..15e051ec84fd769a33aa59c38f74458820c39fab 100644 --- a/data/South-Central Papuan.json +++ b/data/South-Central Papuan.json @@ -2,309 +2,309 @@ "name": "South-Central Papuan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Morehead-Upper Maro", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nambu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Namo", "iso_1_code": null, "iso_3_code": "mxw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9535", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nambo", "iso_1_code": null, "iso_3_code": "ncm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9536", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Neme", "iso_1_code": null, "iso_3_code": "nex", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9537", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Namat", "iso_1_code": null, "iso_3_code": "nkm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9538", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nama", "iso_1_code": null, "iso_3_code": "nmx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9539", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nen", "iso_1_code": null, "iso_3_code": "nqn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9540", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9534", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tonda", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Blafe", "iso_1_code": null, "iso_3_code": "bfh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9542", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rema", "iso_1_code": null, "iso_3_code": "bow", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9543", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wartha Thuntai", "iso_1_code": null, "iso_3_code": "gnt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9544", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kanum, Ngk\u00e2lmpw", "iso_1_code": null, "iso_3_code": "kcd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9545", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kanum, B\u00e4di", "iso_1_code": null, "iso_3_code": "khd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9546", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kanum, Sota", "iso_1_code": null, "iso_3_code": "krz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9547", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kanum, Sm\u00e4rky", "iso_1_code": null, "iso_3_code": "kxq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9548", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kanja", "iso_1_code": null, "iso_3_code": "pep", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9549", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aramba", "iso_1_code": null, "iso_3_code": "stk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9550", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "W\u00e1ra", "iso_1_code": null, "iso_3_code": "tci", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9551", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9541", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yey", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yei", "iso_1_code": null, "iso_3_code": "jei", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9553", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9552", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9533", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pahoturi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Idi", "iso_1_code": null, "iso_3_code": "idi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9555", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Agob", "iso_1_code": null, "iso_3_code": "kit", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9556", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9554", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waia", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tabo", "iso_1_code": null, "iso_3_code": "knv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9558", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9557", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yelmek-Maklew", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yelmek", "iso_1_code": null, "iso_3_code": "jel", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9560", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maklew", "iso_1_code": null, "iso_3_code": "mgf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9561", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9559", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9532", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git "a/data/S\303\241livan.json" "b/data/S\303\241livan.json" index 2027a376d02a59748edbe2dd26f50cbe59a57564..297571a210df6196d1a85624ae3c63a5773550f4 100644 --- "a/data/S\303\241livan.json" +++ "b/data/S\303\241livan.json" @@ -2,51 +2,51 @@ "name": "S\u00e1livan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "S\u00e1liba", "iso_1_code": null, "iso_3_code": "slc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9575", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Piaroa-Maco", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Piaroa", "iso_1_code": null, "iso_3_code": "pid", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9577", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maco", "iso_1_code": null, "iso_3_code": "wpc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9578", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9576", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9574", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Tacanan.json b/data/Tacanan.json index 79799ef410df9316106252524ebd9fd9609f7071..4c5b68684ce4caf755a0d1d7bf1f0f8e1175a383 100644 --- a/data/Tacanan.json +++ b/data/Tacanan.json @@ -2,98 +2,98 @@ "name": "Tacanan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cavine\u00f1a", "iso_1_code": null, "iso_3_code": "cav", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9580", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chama", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ese Ejja", "iso_1_code": null, "iso_3_code": "ese", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9582", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Toromono", "iso_1_code": null, "iso_3_code": "tno", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9583", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9581", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tacana", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Araona", "iso_1_code": null, "iso_3_code": "aro", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9585", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Reyesano", "iso_1_code": null, "iso_3_code": "rey", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9586", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tacana", "iso_1_code": null, "iso_3_code": "tna", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9587", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9584", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9579", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Takelman.json b/data/Takelman.json index a817b5f0446976176e28236e8e30e1ee9478182e..b9a17d4f676378e6ed4b124850071c8c3e1a551b 100644 --- a/data/Takelman.json +++ b/data/Takelman.json @@ -2,40 +2,40 @@ "name": "Takelman", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kalapuya", "iso_1_code": null, "iso_3_code": "kyl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9589", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern Kalapuya", "iso_1_code": null, "iso_3_code": "nrt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9590", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern Kalapuya", "iso_1_code": null, "iso_3_code": "sxk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9591", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9588", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Tarascan.json b/data/Tarascan.json index f39a321ae5f936b7d0b14d82e1cc1a0911e20f40..1d357806e048fabf1a063ae478d5e3b8e0d934aa 100644 --- a/data/Tarascan.json +++ b/data/Tarascan.json @@ -2,34 +2,34 @@ "name": "Tarascan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Purepecha, Western Highland", "iso_1_code": null, "iso_3_code": "pua", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9593", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Purepecha", "iso_1_code": null, "iso_3_code": "tsz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9594", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9592", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Tequistlatecan.json b/data/Tequistlatecan.json index 38f70e1945ea8c412fa3710e73f52f08dd4b4b35..e61244478a994dfbc4d24dfe16560e830ee9310d 100644 --- a/data/Tequistlatecan.json +++ b/data/Tequistlatecan.json @@ -2,32 +2,32 @@ "name": "Tequistlatecan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chontal, Highland Oaxaca", "iso_1_code": null, "iso_3_code": "chd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9596", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chontal, Lowland Oaxaca", "iso_1_code": null, "iso_3_code": "clo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9597", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9595", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Tiniguan.json b/data/Tiniguan.json index 1e5c144619d5aacc6cfa2b24c747ce21aff01bf0..2867ef1f5748ec322af1023e242ff926522a62e8 100644 --- a/data/Tiniguan.json +++ b/data/Tiniguan.json @@ -2,20 +2,20 @@ "name": "Tiniguan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tinigua", "iso_1_code": null, "iso_3_code": "tit", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9599", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9598", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Tor-Kwerba.json b/data/Tor-Kwerba.json index c491e6dc1acf78bd585d70c16d3490034b1714fb..30fee78cd50a97cf229dc7b55b32700e14894b97 100644 --- a/data/Tor-Kwerba.json +++ b/data/Tor-Kwerba.json @@ -2,343 +2,343 @@ "name": "Tor-Kwerba", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Greater Kwerba", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Isirawa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Isirawa", "iso_1_code": null, "iso_3_code": "srl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9603", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9602", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kwerba", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nuclear", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bagusa", "iso_1_code": null, "iso_3_code": "bqb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9606", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kwerba", "iso_1_code": null, "iso_3_code": "kwe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9607", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Trimuris", "iso_1_code": null, "iso_3_code": "tip", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9608", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kauwera", "iso_1_code": null, "iso_3_code": "xau", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9609", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kwerba Mamberamo", "iso_1_code": null, "iso_3_code": "xwr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9610", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9605", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Coast", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Airoran", "iso_1_code": null, "iso_3_code": "air", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9612", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Samarokena", "iso_1_code": null, "iso_3_code": "tmj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9613", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9611", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9604", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9601", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Orya-Tor", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Orya", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Orya", "iso_1_code": null, "iso_3_code": "ury", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9616", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9615", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sause", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sause", "iso_1_code": null, "iso_3_code": "sao", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9618", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9617", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tor", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Betaf", "iso_1_code": null, "iso_3_code": "bfe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9620", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Berik", "iso_1_code": null, "iso_3_code": "bkl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9621", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Beneraf", "iso_1_code": null, "iso_3_code": "bnv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9622", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dabe", "iso_1_code": null, "iso_3_code": "dbe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9623", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Itik", "iso_1_code": null, "iso_3_code": "itx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9624", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jofotek-Bromnya", "iso_1_code": null, "iso_3_code": "jbr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9625", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Keijar", "iso_1_code": null, "iso_3_code": "kdy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9626", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kwinsu", "iso_1_code": null, "iso_3_code": "kuc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9627", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kwesten", "iso_1_code": null, "iso_3_code": "kwt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9628", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mander", "iso_1_code": null, "iso_3_code": "mqr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9629", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dineor", "iso_1_code": null, "iso_3_code": "mrx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9630", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vitou", "iso_1_code": null, "iso_3_code": "vto", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9631", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wares", "iso_1_code": null, "iso_3_code": "wai", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9632", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9619", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9614", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9600", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Torricelli.json b/data/Torricelli.json index 318a9a2f86481a8d16afde4ed3d906bfd330ca19..a5b01cae65552f553824107779d711a70b863cd5 100644 --- a/data/Torricelli.json +++ b/data/Torricelli.json @@ -2,783 +2,783 @@ "name": "Torricelli", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kombio-Arapesh", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Arapesh", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Abu\u2019", "iso_1_code": null, "iso_3_code": "aah", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9636", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mufian", "iso_1_code": null, "iso_3_code": "aoj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9637", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Weri", "iso_1_code": null, "iso_3_code": "aon", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9638", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bukiyip", "iso_1_code": null, "iso_3_code": "ape", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9639", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9635", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kombio", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aruek", "iso_1_code": null, "iso_3_code": "aur", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9641", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eitiep", "iso_1_code": null, "iso_3_code": "eit", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9642", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aro", "iso_1_code": null, "iso_3_code": "tei", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9643", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wom", "iso_1_code": null, "iso_3_code": "wmo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9644", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kombio", "iso_1_code": null, "iso_3_code": "xbi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9645", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yambes", "iso_1_code": null, "iso_3_code": "ymb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9646", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9640", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9634", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maimai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Beli", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Beli", "iso_1_code": null, "iso_3_code": "bey", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9649", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9648", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Laeko-Libuat", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Laeko-Libuat", "iso_1_code": null, "iso_3_code": "lkl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9651", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9650", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maimai Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Heiyoho", "iso_1_code": null, "iso_3_code": "auk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9653", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Siliput", "iso_1_code": null, "iso_3_code": "mkc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9654", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yahang", "iso_1_code": null, "iso_3_code": "rhp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9655", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9652", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wiaki", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Minidien", "iso_1_code": null, "iso_3_code": "wii", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9657", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9656", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9647", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Marienberg", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bungain", "iso_1_code": null, "iso_3_code": "but", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9659", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Buna", "iso_1_code": null, "iso_3_code": "bvn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9660", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Elepi", "iso_1_code": null, "iso_3_code": "ele", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9661", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kamasau", "iso_1_code": null, "iso_3_code": "kms", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9662", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Juwar", "iso_1_code": null, "iso_3_code": "mwb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9663", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wiarumus", "iso_1_code": null, "iso_3_code": "tua", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9664", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Urimo", "iso_1_code": null, "iso_3_code": "urx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9665", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9658", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Monumbo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lilau", "iso_1_code": null, "iso_3_code": "lll", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9667", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Monumbo", "iso_1_code": null, "iso_3_code": "mxk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9668", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9666", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Urim", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Urim", "iso_1_code": null, "iso_3_code": "uri", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9670", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9669", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wapei-Palei", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Palei", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ambrak", "iso_1_code": null, "iso_3_code": "aag", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9673", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Agi", "iso_1_code": null, "iso_3_code": "aif", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9674", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mol", "iso_1_code": null, "iso_3_code": "alx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9675", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bragat", "iso_1_code": null, "iso_3_code": "aof", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9676", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aruop", "iso_1_code": null, "iso_3_code": "lsr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9677", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nabi", "iso_1_code": null, "iso_3_code": "mty", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9678", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wanap", "iso_1_code": null, "iso_3_code": "wnp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9679", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yangum Dey", "iso_1_code": null, "iso_3_code": "yde", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9680", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yangum Gel", "iso_1_code": null, "iso_3_code": "ygl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9681", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yangum Mon", "iso_1_code": null, "iso_3_code": "ymo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9682", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9672", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Urat", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Urat", "iso_1_code": null, "iso_3_code": "urt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9684", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9683", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wapei", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Au", "iso_1_code": null, "iso_3_code": "avt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9686", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Dia", "iso_1_code": null, "iso_3_code": "dia", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9687", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Elkei", "iso_1_code": null, "iso_3_code": "elk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9688", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gnau", "iso_1_code": null, "iso_3_code": "gnu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9689", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ningil", "iso_1_code": null, "iso_3_code": "niz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9690", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Olo", "iso_1_code": null, "iso_3_code": "ong", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9691", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sinagen", "iso_1_code": null, "iso_3_code": "siu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9692", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Walman", "iso_1_code": null, "iso_3_code": "van", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9693", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yeri", "iso_1_code": null, "iso_3_code": "yev", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9694", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yis", "iso_1_code": null, "iso_3_code": "yis", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9695", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yil", "iso_1_code": null, "iso_3_code": "yll", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9696", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yau", "iso_1_code": null, "iso_3_code": "yyu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9697", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9685", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9671", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Wapei", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Seti", "iso_1_code": null, "iso_3_code": "sbi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9699", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Seta", "iso_1_code": null, "iso_3_code": "stf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9700", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "One", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "One, Molmo", "iso_1_code": null, "iso_3_code": "aun", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9702", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "One, Inebu", "iso_1_code": null, "iso_3_code": "oin", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9703", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "One, Kwamtim", "iso_1_code": null, "iso_3_code": "okk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9704", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "One, Kabore", "iso_1_code": null, "iso_3_code": "onk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9705", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "One, Northern", "iso_1_code": null, "iso_3_code": "onr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9706", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "One, Southern", "iso_1_code": null, "iso_3_code": "osu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9707", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9701", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9698", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9633", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Totonacan.json b/data/Totonacan.json index 362302a8a90e8903c6f424a30f182a625e2bb922..e1b7bac0cba4c5242390d34737b893f261fca329 100644 --- a/data/Totonacan.json +++ b/data/Totonacan.json @@ -2,168 +2,168 @@ "name": "Totonacan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tepehua", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tepehua, Huehuetla", "iso_1_code": null, "iso_3_code": "tee", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9710", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tepehua, Pisaflores", "iso_1_code": null, "iso_3_code": "tpp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9711", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tepehua, Tlachichilco", "iso_1_code": null, "iso_3_code": "tpt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9712", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9709", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Totonac", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Totonac, Tecpatl\u00e1n", "iso_1_code": null, "iso_3_code": "tcw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9714", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Totonac, Upper Necaxa", "iso_1_code": null, "iso_3_code": "tku", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9715", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Totonac, Yecuatla", "iso_1_code": null, "iso_3_code": "tlc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9716", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Totonac, Filomena Mata-Coahuitl\u00e1n", "iso_1_code": null, "iso_3_code": "tlp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9717", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Totonac, Coyutla", "iso_1_code": null, "iso_3_code": "toc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9718", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Totonac, Xicotepec de Ju\u00e1rez", "iso_1_code": null, "iso_3_code": "too", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9719", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Totonac, Papantla", "iso_1_code": null, "iso_3_code": "top", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9720", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Totonac, Highland", "iso_1_code": null, "iso_3_code": "tos", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9721", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Totonaco del cerro Xinolat\u00e9petl", "iso_1_code": null, "iso_3_code": "tqt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9722", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9713", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9708", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Trans-New Guinea.json b/data/Trans-New Guinea.json index 66c84a2f49b0ec9405f736588f2fa45102d42ec0..5687a9cb940937fc517457fdb8b41a45ba77cda1 100644 --- a/data/Trans-New Guinea.json +++ b/data/Trans-New Guinea.json @@ -2,7162 +2,7162 @@ "name": "Trans-New Guinea", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Angan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Angaatiha", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Angaataha", "iso_1_code": null, "iso_3_code": "agm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9726", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9725", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nuclear Angan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ankave", "iso_1_code": null, "iso_3_code": "aak", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9728", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tainae", "iso_1_code": null, "iso_3_code": "ago", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9729", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Safeyoka", "iso_1_code": null, "iso_3_code": "apz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9730", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yipma", "iso_1_code": null, "iso_3_code": "byr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9731", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Hamtai", "iso_1_code": null, "iso_3_code": "hmt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9732", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kawacha", "iso_1_code": null, "iso_3_code": "kcb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9733", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kamasa", "iso_1_code": null, "iso_3_code": "klp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9734", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Menya", "iso_1_code": null, "iso_3_code": "mcr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9735", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Akoye", "iso_1_code": null, "iso_3_code": "miw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9736", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Simbari", "iso_1_code": null, "iso_3_code": "smb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9737", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Susuami", "iso_1_code": null, "iso_3_code": "ssu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9738", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yagwoia", "iso_1_code": null, "iso_3_code": "ygw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9739", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9727", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9724", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Asmat-Kamoro", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Asmat", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Asmat, Casuarina Coast", "iso_1_code": null, "iso_3_code": "asc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9742", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Asmat, Yaosakor", "iso_1_code": null, "iso_3_code": "asy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9743", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Asmat, Central", "iso_1_code": null, "iso_3_code": "cns", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9744", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Asmat, North", "iso_1_code": null, "iso_3_code": "nks", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9745", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Citak, Tamnim", "iso_1_code": null, "iso_3_code": "tml", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9746", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Citak", "iso_1_code": null, "iso_3_code": "txt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9747", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9741", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Diuwe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Diuwe", "iso_1_code": null, "iso_3_code": "diy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9749", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9748", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kamoro", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kamoro", "iso_1_code": null, "iso_3_code": "kgq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9751", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9750", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sabakor", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Buruwai", "iso_1_code": null, "iso_3_code": "asi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9753", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kamberau", "iso_1_code": null, "iso_3_code": "irx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9754", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9752", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sempan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sempan", "iso_1_code": null, "iso_3_code": "xse", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9756", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9755", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9740", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Awin-Pare", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aekyom", "iso_1_code": null, "iso_3_code": "awi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9758", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Pa", "iso_1_code": null, "iso_3_code": "ppt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9759", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9757", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bosavi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Eibela", "iso_1_code": null, "iso_3_code": "ail", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9761", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaluli", "iso_1_code": null, "iso_3_code": "bco", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9762", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bedamuni", "iso_1_code": null, "iso_3_code": "beo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9763", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dibiyaso", "iso_1_code": null, "iso_3_code": "dby", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9764", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Edolo", "iso_1_code": null, "iso_3_code": "etr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9765", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kasua", "iso_1_code": null, "iso_3_code": "khs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9766", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Onobasulu", "iso_1_code": null, "iso_3_code": "onn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9767", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sonia", "iso_1_code": null, "iso_3_code": "siq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9768", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Turumsa", "iso_1_code": null, "iso_3_code": "tqm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9769", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9760", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chimbu-Wahgi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chimbu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chuave", "iso_1_code": null, "iso_3_code": "cjv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9772", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Dom", "iso_1_code": null, "iso_3_code": "doa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9773", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Golin", "iso_1_code": null, "iso_3_code": "gvf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9774", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kuman", "iso_1_code": null, "iso_3_code": "kue", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9775", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nomane", "iso_1_code": null, "iso_3_code": "nof", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9776", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yui", "iso_1_code": null, "iso_3_code": "sll", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9777", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sinasina", "iso_1_code": null, "iso_3_code": "sst", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9778", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9771", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hagen", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kaugel", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Imbongu", "iso_1_code": null, "iso_3_code": "imo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9781", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bo-Ung", "iso_1_code": null, "iso_3_code": "mux", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9782", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Umbu-Ungu", "iso_1_code": null, "iso_3_code": "ubu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9783", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9780", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Melpa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Melpa", "iso_1_code": null, "iso_3_code": "med", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9785", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9784", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9779", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jimi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kandawo", "iso_1_code": null, "iso_3_code": "gam", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9787", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Maring", "iso_1_code": null, "iso_3_code": "mbw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9788", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Narak", "iso_1_code": null, "iso_3_code": "nac", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9789", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9786", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wahgi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nii", "iso_1_code": null, "iso_3_code": "nii", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9791", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Wahgi", "iso_1_code": null, "iso_3_code": "wgi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9792", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yuwei", "iso_1_code": null, "iso_3_code": "whg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9793", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9790", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9770", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Damal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Damal", "iso_1_code": null, "iso_3_code": "uhn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9795", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9794", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dem", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dem", "iso_1_code": null, "iso_3_code": "dem", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9797", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9796", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Duna-Bogaya", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bogaya", "iso_1_code": null, "iso_3_code": "boq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9799", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Duna", "iso_1_code": null, "iso_3_code": "duc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9800", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9798", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East Kutubu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Fiwaga", "iso_1_code": null, "iso_3_code": "fiw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9802", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Foi", "iso_1_code": null, "iso_3_code": "foi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9803", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9801", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East Strickland", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Fembe", "iso_1_code": null, "iso_3_code": "agl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9805", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gebusi", "iso_1_code": null, "iso_3_code": "goi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9806", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kubo", "iso_1_code": null, "iso_3_code": "jko", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9807", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Odoodee", "iso_1_code": null, "iso_3_code": "kkc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9808", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Konai", "iso_1_code": null, "iso_3_code": "kxw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9809", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Samo", "iso_1_code": null, "iso_3_code": "smq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9810", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9804", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eleman", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nuclear Eleman", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Toaripi", "iso_1_code": null, "iso_3_code": "tqo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9814", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tairuma", "iso_1_code": null, "iso_3_code": "uar", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9815", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9813", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Opao", "iso_1_code": null, "iso_3_code": "opo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9817", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Orokolo", "iso_1_code": null, "iso_3_code": "oro", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9818", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Keoru-Ahia", "iso_1_code": null, "iso_3_code": "xeu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9819", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9816", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9812", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Purari", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Purari", "iso_1_code": null, "iso_3_code": "iar", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9821", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9820", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tate", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kaki Ae", "iso_1_code": null, "iso_3_code": "tbd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9823", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9822", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9811", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Engan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Angal-Kewa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Angal", "iso_1_code": null, "iso_3_code": "age", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9826", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Angal Heneng", "iso_1_code": null, "iso_3_code": "akh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9827", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Angal Enen", "iso_1_code": null, "iso_3_code": "aoe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9828", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kewapi, West", "iso_1_code": null, "iso_3_code": "kew", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9829", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kewapi, East", "iso_1_code": null, "iso_3_code": "kjs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9830", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Pole", "iso_1_code": null, "iso_3_code": "kjy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9831", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Samberigi", "iso_1_code": null, "iso_3_code": "ssx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9832", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9825", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Enga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bisorio", "iso_1_code": null, "iso_3_code": "bir", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9834", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Enga", "iso_1_code": null, "iso_3_code": "enq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9835", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ipili", "iso_1_code": null, "iso_3_code": "ipi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9836", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kyaka", "iso_1_code": null, "iso_3_code": "kyc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9837", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Lembena", "iso_1_code": null, "iso_3_code": "leq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9838", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nete", "iso_1_code": null, "iso_3_code": "net", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9839", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9833", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Huli", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Huli", "iso_1_code": null, "iso_3_code": "hui", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9841", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9840", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9824", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Finisterre-Huon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Finisterre", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Erap", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Finongan", "iso_1_code": null, "iso_3_code": "fag", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9845", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nema", "iso_1_code": null, "iso_3_code": "gsn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9846", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Doloman", "iso_1_code": null, "iso_3_code": "mhf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9847", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mungkip", "iso_1_code": null, "iso_3_code": "mpv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9848", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nakame", "iso_1_code": null, "iso_3_code": "nib", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9849", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nek", "iso_1_code": null, "iso_3_code": "nif", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9850", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sama", "iso_1_code": null, "iso_3_code": "nis", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9851", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nuk", "iso_1_code": null, "iso_3_code": "noc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9852", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Numanggang", "iso_1_code": null, "iso_3_code": "nop", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9853", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ma Manda", "iso_1_code": null, "iso_3_code": "skc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9854", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uri", "iso_1_code": null, "iso_3_code": "uvh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9855", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9844", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gusap-Mot", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Madi", "iso_1_code": null, "iso_3_code": "grg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9857", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Iyo", "iso_1_code": null, "iso_3_code": "nca", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9858", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Neko", "iso_1_code": null, "iso_3_code": "nej", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9859", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nekgini", "iso_1_code": null, "iso_3_code": "nkg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9860", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngaing", "iso_1_code": null, "iso_3_code": "nnf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9861", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rawa", "iso_1_code": null, "iso_3_code": "rwo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9862", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ufim", "iso_1_code": null, "iso_3_code": "ufi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9863", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9856", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uruwa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Weliki", "iso_1_code": null, "iso_3_code": "klh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9865", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nukna", "iso_1_code": null, "iso_3_code": "klt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9866", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kutong", "iso_1_code": null, "iso_3_code": "skm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9867", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tayatuk", "iso_1_code": null, "iso_3_code": "smc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9868", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yau", "iso_1_code": null, "iso_3_code": "yuw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9869", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9864", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wantoat", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Awara", "iso_1_code": null, "iso_3_code": "awx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9871", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tuma-Irumu", "iso_1_code": null, "iso_3_code": "iou", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9872", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Wantoat", "iso_1_code": null, "iso_3_code": "wnc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9873", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9870", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Warup", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Muratayak", "iso_1_code": null, "iso_3_code": "asx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9875", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gamane", "iso_1_code": null, "iso_3_code": "bmp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9876", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gwahatike", "iso_1_code": null, "iso_3_code": "dah", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9877", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Degenang", "iso_1_code": null, "iso_3_code": "dge", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9878", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Forak", "iso_1_code": null, "iso_3_code": "frq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9879", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Guya", "iso_1_code": null, "iso_3_code": "gka", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9880", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Asaro\u2019o", "iso_1_code": null, "iso_3_code": "mtv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9881", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tand\u0268", "iso_1_code": null, "iso_3_code": "ygm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9882", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9874", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yupna", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bonkiman", "iso_1_code": null, "iso_3_code": "bop", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9884", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Domung", "iso_1_code": null, "iso_3_code": "dev", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9885", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ma", "iso_1_code": null, "iso_3_code": "mjn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9886", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nankina", "iso_1_code": null, "iso_3_code": "nnk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9887", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yout Wam", "iso_1_code": null, "iso_3_code": "ytw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9888", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yopno", "iso_1_code": null, "iso_3_code": "yut", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9889", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9883", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9843", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Huon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dedua", "iso_1_code": null, "iso_3_code": "ded", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9892", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kube", "iso_1_code": null, "iso_3_code": "kgf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9893", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "K\u00e2te", "iso_1_code": null, "iso_3_code": "kmg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9894", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Borong", "iso_1_code": null, "iso_3_code": "ksr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9895", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mape", "iso_1_code": null, "iso_3_code": "mlh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9896", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Migabac", "iso_1_code": null, "iso_3_code": "mpp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9897", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Momare", "iso_1_code": null, "iso_3_code": "msz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9898", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sene", "iso_1_code": null, "iso_3_code": "sej", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9899", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9891", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kovai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kovai", "iso_1_code": null, "iso_3_code": "kqb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9901", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9900", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Somba-Siawari", "iso_1_code": null, "iso_3_code": "bmu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9903", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kinalakna", "iso_1_code": null, "iso_3_code": "kco", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9904", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Komba", "iso_1_code": null, "iso_3_code": "kpf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9905", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kumukio", "iso_1_code": null, "iso_3_code": "kuo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9906", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mesem", "iso_1_code": null, "iso_3_code": "mci", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9907", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nabak", "iso_1_code": null, "iso_3_code": "naf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9908", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nomu", "iso_1_code": null, "iso_3_code": "noh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9909", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ono", "iso_1_code": null, "iso_3_code": "ons", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9910", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sialum", "iso_1_code": null, "iso_3_code": "slw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9911", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Selepet", "iso_1_code": null, "iso_3_code": "spl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9912", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tobo", "iso_1_code": null, "iso_3_code": "tbv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9913", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Timbe", "iso_1_code": null, "iso_3_code": "tim", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9914", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9902", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9890", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9842", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gogodala-Suki", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gogodala", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ari", "iso_1_code": null, "iso_3_code": "aac", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9917", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gogodala", "iso_1_code": null, "iso_3_code": "ggw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9918", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waruna", "iso_1_code": null, "iso_3_code": "wrv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9919", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9916", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Suki", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Suki", "iso_1_code": null, "iso_3_code": "sui", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9921", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9920", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9915", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Greater Binanderean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Binanderean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "North Binanderean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Suena", "iso_1_code": null, "iso_3_code": "sue", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9925", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zia", "iso_1_code": null, "iso_3_code": "zia", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9926", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9924", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nuclear Binanderean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Binandere", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Binandere", "iso_1_code": null, "iso_3_code": "bhg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9929", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9928", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South Binanderean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Coastal Binanderean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Baruga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Baruga", "iso_1_code": null, "iso_3_code": "bjz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9933", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Doghoro", "iso_1_code": null, "iso_3_code": "dgx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9934", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9932", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gaena-Korafe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gaina", "iso_1_code": null, "iso_3_code": "gcn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9936", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Korafe-Yegha", "iso_1_code": null, "iso_3_code": "kpr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9937", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9935", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Notu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ewage-Notu", "iso_1_code": null, "iso_3_code": "nou", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9939", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9938", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9931", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Orokaivan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aeka", "iso_1_code": null, "iso_3_code": "aez", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9941", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hunjara-Kaina Ke", "iso_1_code": null, "iso_3_code": "hkk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9942", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Orokaiva", "iso_1_code": null, "iso_3_code": "okv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9943", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9940", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9930", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9927", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yekora", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yekora", "iso_1_code": null, "iso_3_code": "ykr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9945", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9944", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9923", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Guhu-Samane", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Guhu-Samane", "iso_1_code": null, "iso_3_code": "ghs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9947", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9946", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9922", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Inland Gulf", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ipiko", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ipiko", "iso_1_code": null, "iso_3_code": "ipo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9950", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9949", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Minanibai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Foia Foia", "iso_1_code": null, "iso_3_code": "ffi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9952", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hoia Hoia", "iso_1_code": null, "iso_3_code": "hhi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9953", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hoyahoya", "iso_1_code": null, "iso_3_code": "hhy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9954", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Minanibai", "iso_1_code": null, "iso_3_code": "mcv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9955", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mubami", "iso_1_code": null, "iso_3_code": "tsx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9956", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karami", "iso_1_code": null, "iso_3_code": "xar", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9957", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9951", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9948", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kainantu-Goroka", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gorokan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Fore", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Fore", "iso_1_code": null, "iso_3_code": "for", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9961", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Gimi", "iso_1_code": null, "iso_3_code": "gim", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9962", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9960", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gahuku-Benabena", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dano", "iso_1_code": null, "iso_3_code": "aso", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9964", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Benabena", "iso_1_code": null, "iso_3_code": "bef", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9965", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Alekano", "iso_1_code": null, "iso_3_code": "gah", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9966", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tokano", "iso_1_code": null, "iso_3_code": "zuh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9967", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9963", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gende", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gende", "iso_1_code": null, "iso_3_code": "gaf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9969", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9968", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Isabi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Isabi", "iso_1_code": null, "iso_3_code": "isa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9971", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9970", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kamano-Yagaria", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Wagama", "iso_1_code": null, "iso_3_code": "abg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9973", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Inoke-Yate", "iso_1_code": null, "iso_3_code": "ino", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9974", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kamano", "iso_1_code": null, "iso_3_code": "kbq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9975", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kanite", "iso_1_code": null, "iso_3_code": "kmu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9976", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Keyagana", "iso_1_code": null, "iso_3_code": "kyg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9977", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yagaria", "iso_1_code": null, "iso_3_code": "ygr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9978", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9972", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Siane", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Siane", "iso_1_code": null, "iso_3_code": "snp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9980", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yaweyuha", "iso_1_code": null, "iso_3_code": "yby", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9981", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9979", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9959", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kainantu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aziana", "iso_1_code": null, "iso_3_code": "gat", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9983", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gadsup-Auyana-Awa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Agarabi", "iso_1_code": null, "iso_3_code": "agd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9985", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Awiyaana", "iso_1_code": null, "iso_3_code": "auy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9986", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Awa", "iso_1_code": null, "iso_3_code": "awb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9987", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Gadsup", "iso_1_code": null, "iso_3_code": "gaj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9988", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kosena", "iso_1_code": null, "iso_3_code": "kze", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9989", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ontenu", "iso_1_code": null, "iso_3_code": "ont", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9990", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Usarufa", "iso_1_code": null, "iso_3_code": "usa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9991", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9984", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kambaira", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Asa\u2019a", "iso_1_code": null, "iso_3_code": "kyy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9993", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9992", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Owenia", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Owenia", "iso_1_code": null, "iso_3_code": "wsr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9995", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9994", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tairora", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Binumarien", "iso_1_code": null, "iso_3_code": "bjr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9997", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tairora, South", "iso_1_code": null, "iso_3_code": "omw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9998", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tairora, North", "iso_1_code": null, "iso_3_code": "tbg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "9999", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Waffa", "iso_1_code": null, "iso_3_code": "waj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10000", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "9996", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9982", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9958", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kamula", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kamula", "iso_1_code": null, "iso_3_code": "xla", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10002", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10001", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kayagar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Atohwaim", "iso_1_code": null, "iso_3_code": "aqm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10004", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kayagar", "iso_1_code": null, "iso_3_code": "kyt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10005", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tamagario", "iso_1_code": null, "iso_3_code": "tcg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10006", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10003", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kiwaian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bamu", "iso_1_code": null, "iso_3_code": "bcf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10008", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kiwai, Northeast", "iso_1_code": null, "iso_3_code": "kiw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10009", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kiwai, Southern", "iso_1_code": null, "iso_3_code": "kjd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10010", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waboda", "iso_1_code": null, "iso_3_code": "kmx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10011", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kerewo", "iso_1_code": null, "iso_3_code": "kxz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10012", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Morigi", "iso_1_code": null, "iso_3_code": "mdb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10013", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kibiri", "iso_1_code": null, "iso_3_code": "prm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10014", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10007", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kolopom", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kimaghima", "iso_1_code": null, "iso_3_code": "kig", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10016", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ndom", "iso_1_code": null, "iso_3_code": "nqm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10017", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Riantana", "iso_1_code": null, "iso_3_code": "ran", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10018", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10015", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Madang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Croisilles", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Amaimon", "iso_1_code": null, "iso_3_code": "ali", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10021", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kare", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kare", "iso_1_code": null, "iso_3_code": "kmf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10023", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10022", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kokon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Girawa", "iso_1_code": null, "iso_3_code": "bbr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10025", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kein", "iso_1_code": null, "iso_3_code": "bmh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10026", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Munit", "iso_1_code": null, "iso_3_code": "mtc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10027", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10024", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kowan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Amako", "iso_1_code": null, "iso_3_code": "koz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10029", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waskia", "iso_1_code": null, "iso_3_code": "wsk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10030", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10028", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mabuso", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gum", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Amele", "iso_1_code": null, "iso_3_code": "aey", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10033", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Fulumu", "iso_1_code": null, "iso_3_code": "bbd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10034", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gumalu", "iso_1_code": null, "iso_3_code": "gmu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10035", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sihan", "iso_1_code": null, "iso_3_code": "snr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10036", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Panim-Isebe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Isebe", "iso_1_code": null, "iso_3_code": "igo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10038", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Panim", "iso_1_code": null, "iso_3_code": "pnr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10039", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10037", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10032", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hanseman", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Baimak", "iso_1_code": null, "iso_3_code": "bmx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10041", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bagupi", "iso_1_code": null, "iso_3_code": "bpi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10042", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wagi", "iso_1_code": null, "iso_3_code": "fad", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10043", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Gal", "iso_1_code": null, "iso_3_code": "gap", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10044", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nobonob", "iso_1_code": null, "iso_3_code": "gaw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10045", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Garus", "iso_1_code": null, "iso_3_code": "gyb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10046", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mawan", "iso_1_code": null, "iso_3_code": "mcz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10047", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Matepi", "iso_1_code": null, "iso_3_code": "mqe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10048", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nake", "iso_1_code": null, "iso_3_code": "nbk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10049", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rempi", "iso_1_code": null, "iso_3_code": "rmp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10050", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rapting", "iso_1_code": null, "iso_3_code": "rpt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10051", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saruga", "iso_1_code": null, "iso_3_code": "sra", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10052", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yoidik", "iso_1_code": null, "iso_3_code": "ydk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10053", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Silopi-Utu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Utu", "iso_1_code": null, "iso_3_code": "utu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10055", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Silopi", "iso_1_code": null, "iso_3_code": "xsp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10056", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10054", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wamas-Samosa-Murupi-Mosimo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mosimo", "iso_1_code": null, "iso_3_code": "mqv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10058", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Murupi", "iso_1_code": null, "iso_3_code": "mqw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10059", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Samosa", "iso_1_code": null, "iso_3_code": "swm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10060", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wamas", "iso_1_code": null, "iso_3_code": "wmc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10061", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10057", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10040", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10031", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mugil", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bargam", "iso_1_code": null, "iso_3_code": "mlp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10063", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10062", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern Adelbert", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gavak", "iso_1_code": null, "iso_3_code": "dmc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10065", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaukombar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mala", "iso_1_code": null, "iso_3_code": "ped", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10067", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miani", "iso_1_code": null, "iso_3_code": "pla", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10068", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maia", "iso_1_code": null, "iso_3_code": "sks", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10069", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maiani", "iso_1_code": null, "iso_3_code": "tnh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10070", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10066", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kumil-Tibor", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kumil", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bepour", "iso_1_code": null, "iso_3_code": "bie", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10073", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mauwake", "iso_1_code": null, "iso_3_code": "mhl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10074", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Moere", "iso_1_code": null, "iso_3_code": "mvq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10075", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10072", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tibor", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pamosu", "iso_1_code": null, "iso_3_code": "hih", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10077", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mawak", "iso_1_code": null, "iso_3_code": "mjj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10078", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hember Avu", "iso_1_code": null, "iso_3_code": "mmi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10079", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mokati", "iso_1_code": null, "iso_3_code": "wnb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10080", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kowaki", "iso_1_code": null, "iso_3_code": "xow", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10081", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10076", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10071", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manep-Barem", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Brem", "iso_1_code": null, "iso_3_code": "buq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10083", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manep", "iso_1_code": null, "iso_3_code": "mkr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10084", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10082", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Numugen", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Karian-Usan-Yaban", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Karian", "iso_1_code": null, "iso_3_code": "bql", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10087", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Usan", "iso_1_code": null, "iso_3_code": "wnu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10088", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yaben", "iso_1_code": null, "iso_3_code": "ybm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10089", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10086", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yarawata-Parawen-Ukuriguma", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Parawen", "iso_1_code": null, "iso_3_code": "prw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10091", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ukuriguma", "iso_1_code": null, "iso_3_code": "ukg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10092", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yarawata", "iso_1_code": null, "iso_3_code": "yrw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10093", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10090", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10085", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10064", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Omosan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pal", "iso_1_code": null, "iso_3_code": "abw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10095", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kovol", "iso_1_code": null, "iso_3_code": "kgu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10096", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10094", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10020", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kalam-Kobon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kalam", "iso_1_code": null, "iso_3_code": "kmh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10098", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kobon", "iso_1_code": null, "iso_3_code": "kpw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10099", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tay", "iso_1_code": null, "iso_3_code": "taw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10100", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10097", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rai Coast", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Wasembo", "iso_1_code": null, "iso_3_code": "gsp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10102", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Biyom-Tauya", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Biyom", "iso_1_code": null, "iso_3_code": "bpm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10104", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tauya", "iso_1_code": null, "iso_3_code": "tya", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10105", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10103", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Evapia", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kou", "iso_1_code": null, "iso_3_code": "snz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10107", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wia", "iso_1_code": null, "iso_3_code": "ssj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10108", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Watiwa", "iso_1_code": null, "iso_3_code": "wtf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10109", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koromu", "iso_1_code": null, "iso_3_code": "xes", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10110", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10106", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kabenau", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Taupi", "iso_1_code": null, "iso_3_code": "awm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10112", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Migum", "iso_1_code": null, "iso_3_code": "klm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10113", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lemio", "iso_1_code": null, "iso_3_code": "lei", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10114", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pulabu", "iso_1_code": null, "iso_3_code": "pup", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10115", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Siroi", "iso_1_code": null, "iso_3_code": "ssd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10116", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10111", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mindjim", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Anjam", "iso_1_code": null, "iso_3_code": "boj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10118", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bongu", "iso_1_code": null, "iso_3_code": "bpu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10119", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Soq", "iso_1_code": null, "iso_3_code": "mdc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10120", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sam", "iso_1_code": null, "iso_3_code": "snx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10121", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10117", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nuru", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Uyajitaya", "iso_1_code": null, "iso_3_code": "duk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10123", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ogea", "iso_1_code": null, "iso_3_code": "eri", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10124", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Jilim", "iso_1_code": null, "iso_3_code": "jil", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10125", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waube", "iso_1_code": null, "iso_3_code": "kop", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10126", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rerau", "iso_1_code": null, "iso_3_code": "rea", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10127", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uya", "iso_1_code": null, "iso_3_code": "usu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10128", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dubuporo", "iso_1_code": null, "iso_3_code": "ynl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10129", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10122", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Peka", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Danaru", "iso_1_code": null, "iso_3_code": "dnr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10131", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sumau", "iso_1_code": null, "iso_3_code": "six", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10132", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kobuka", "iso_1_code": null, "iso_3_code": "urg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10133", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sob", "iso_1_code": null, "iso_3_code": "urw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10134", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10130", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yaganon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dumun", "iso_1_code": null, "iso_3_code": "dui", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10136", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ganglau", "iso_1_code": null, "iso_3_code": "ggl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10137", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saep", "iso_1_code": null, "iso_3_code": "spd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10138", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yabong", "iso_1_code": null, "iso_3_code": "ybo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10139", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10135", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10101", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South Adelbert", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Josephstaal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Osum", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Utarmbung", "iso_1_code": null, "iso_3_code": "omo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10143", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10142", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pomoikan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Anamuxra", "iso_1_code": null, "iso_3_code": "imi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10145", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Moresada", "iso_1_code": null, "iso_3_code": "msx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10146", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Anam", "iso_1_code": null, "iso_3_code": "pda", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10147", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10144", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wadaginam", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Wadaginam", "iso_1_code": null, "iso_3_code": "wdg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10149", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10148", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10141", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sogeram", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Central Sogeram", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Apal", "iso_1_code": null, "iso_3_code": "ena", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10152", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Magiyi", "iso_1_code": null, "iso_3_code": "gmg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10153", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manat", "iso_1_code": null, "iso_3_code": "pmr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10154", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North Central Sogeram", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mum", "iso_1_code": null, "iso_3_code": "kqa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10156", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sirva", "iso_1_code": null, "iso_3_code": "sbq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10157", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10155", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10151", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East Sogeram", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kursav", "iso_1_code": null, "iso_3_code": "faj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10159", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gants", "iso_1_code": null, "iso_3_code": "gao", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10160", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mag\u0268", "iso_1_code": null, "iso_3_code": "gkd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10161", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aisi", "iso_1_code": null, "iso_3_code": "mmq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10162", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10158", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Sogeram", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nend", "iso_1_code": null, "iso_3_code": "anh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10164", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mand", "iso_1_code": null, "iso_3_code": "ate", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10165", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10163", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10150", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10140", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10019", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Marind", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Boazi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kuni-Boazi", "iso_1_code": null, "iso_3_code": "kvg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10168", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zimakani", "iso_1_code": null, "iso_3_code": "zik", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10169", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10167", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nuclear Marind", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Marind, Bian", "iso_1_code": null, "iso_3_code": "bpv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10171", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Marind", "iso_1_code": null, "iso_3_code": "mrz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10172", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10170", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yaqay", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Warkay-Bipim", "iso_1_code": null, "iso_3_code": "bgv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10174", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yaqay", "iso_1_code": null, "iso_3_code": "jaq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10175", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10173", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10166", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mek", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lik", "iso_1_code": null, "iso_3_code": "eip", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10178", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yale, Kosarek", "iso_1_code": null, "iso_3_code": "kkl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10179", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Korupun-Sela", "iso_1_code": null, "iso_3_code": "kpq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10180", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Una", "iso_1_code": null, "iso_3_code": "mtg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10181", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nalca", "iso_1_code": null, "iso_3_code": "nlc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10182", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nipsan", "iso_1_code": null, "iso_3_code": "nps", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10183", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10177", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ketengban", "iso_1_code": null, "iso_3_code": "xte", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10185", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10184", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10176", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mombum", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Koneraw", "iso_1_code": null, "iso_3_code": "kdw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10187", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mombum", "iso_1_code": null, "iso_3_code": "mso", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10188", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10186", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mor", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mor", "iso_1_code": null, "iso_3_code": "moq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10190", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10189", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Moraori", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Morori", "iso_1_code": null, "iso_3_code": "mok", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10192", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10191", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ok-Awyu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Awyu-Dumut", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Awyu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aghu", "iso_1_code": null, "iso_3_code": "ahh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10196", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Awyu, South", "iso_1_code": null, "iso_3_code": "aws", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10197", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Awyu, Central", "iso_1_code": null, "iso_3_code": "awu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10198", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Awyu, Jair", "iso_1_code": null, "iso_3_code": "awv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10199", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Awyu, Edera", "iso_1_code": null, "iso_3_code": "awy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10200", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Awyu, Asue", "iso_1_code": null, "iso_3_code": "psa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10201", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Awyu, North", "iso_1_code": null, "iso_3_code": "yir", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10202", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10195", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dumut", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mandobo Atas", "iso_1_code": null, "iso_3_code": "aax", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10204", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mandobo Bawah", "iso_1_code": null, "iso_3_code": "bwp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10205", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ketum", "iso_1_code": null, "iso_3_code": "ktt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10206", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kombai", "iso_1_code": null, "iso_3_code": "tyn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10207", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wambon", "iso_1_code": null, "iso_3_code": "wms", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10208", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wanggom", "iso_1_code": null, "iso_3_code": "wng", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10209", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10203", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Korowai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Korowai", "iso_1_code": null, "iso_3_code": "khe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10211", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10210", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sawi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sawi", "iso_1_code": null, "iso_3_code": "saw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10213", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10212", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10194", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ok", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lowland", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Morop", "iso_1_code": null, "iso_3_code": "iwo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10216", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Muyu, North", "iso_1_code": null, "iso_3_code": "kti", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10217", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Muyu, South", "iso_1_code": null, "iso_3_code": "kts", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10218", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ninggerum", "iso_1_code": null, "iso_3_code": "nxr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10219", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yongkom", "iso_1_code": null, "iso_3_code": "yon", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10220", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10215", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mountain", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bimin", "iso_1_code": null, "iso_3_code": "bhl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10222", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Faiwol", "iso_1_code": null, "iso_3_code": "fai", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10223", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mian", "iso_1_code": null, "iso_3_code": "mpt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10224", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nakai", "iso_1_code": null, "iso_3_code": "nkj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10225", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Setaman", "iso_1_code": null, "iso_3_code": "stm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10226", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Suganga", "iso_1_code": null, "iso_3_code": "sug", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10227", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tifal", "iso_1_code": null, "iso_3_code": "tif", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10228", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Telefol", "iso_1_code": null, "iso_3_code": "tlf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10229", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Urapmin", "iso_1_code": null, "iso_3_code": "urm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10230", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10221", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngalum", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tsaukambo", "iso_1_code": null, "iso_3_code": "kvz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10232", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Komyandaret", "iso_1_code": null, "iso_3_code": "kzv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10233", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngalum", "iso_1_code": null, "iso_3_code": "szb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10234", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10231", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tangko", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tangko", "iso_1_code": null, "iso_3_code": "tkx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10236", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10235", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Burumakok", "iso_1_code": null, "iso_3_code": "aip", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10238", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kwer", "iso_1_code": null, "iso_3_code": "kwr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10239", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kopkaka", "iso_1_code": null, "iso_3_code": "opk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10240", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10237", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10214", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10193", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oksapmin", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Oksapmin", "iso_1_code": null, "iso_3_code": "opm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10242", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10241", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pawaian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pawaia", "iso_1_code": null, "iso_3_code": "pwa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10244", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10243", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South Bird\u2019s Head", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Inanwatan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Duriankere", "iso_1_code": null, "iso_3_code": "dbn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10247", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Suabo", "iso_1_code": null, "iso_3_code": "szp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10248", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10246", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Konda-Yahadian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Konda", "iso_1_code": null, "iso_3_code": "knd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10250", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yahadian", "iso_1_code": null, "iso_3_code": "ner", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10251", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10249", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South Bird\u2019s Head Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kokoda", "iso_1_code": null, "iso_3_code": "xod", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10254", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10253", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kemberano", "iso_1_code": null, "iso_3_code": "bzp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10256", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Arandai", "iso_1_code": null, "iso_3_code": "jbj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10257", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10255", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kais", "iso_1_code": null, "iso_3_code": "kzm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10259", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Puragi", "iso_1_code": null, "iso_3_code": "pru", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10260", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaburi", "iso_1_code": null, "iso_3_code": "uka", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10261", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10258", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10252", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10245", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southeast Papuan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dagan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Daga", "iso_1_code": null, "iso_3_code": "dgz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10264", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Umanakaina", "iso_1_code": null, "iso_3_code": "gdn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10265", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ginuman", "iso_1_code": null, "iso_3_code": "gnm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10266", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dima", "iso_1_code": null, "iso_3_code": "jma", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10267", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mapena", "iso_1_code": null, "iso_3_code": "mnm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10268", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maiwa", "iso_1_code": null, "iso_3_code": "mti", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10269", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Onjob", "iso_1_code": null, "iso_3_code": "onj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10270", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kanasi", "iso_1_code": null, "iso_3_code": "soq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10271", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Turaka", "iso_1_code": null, "iso_3_code": "trh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10272", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10263", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Goilalan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Fuyug", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Fuyug", "iso_1_code": null, "iso_3_code": "fuy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10275", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10274", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kunimaipa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Biangai", "iso_1_code": null, "iso_3_code": "big", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10277", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kunimaipa", "iso_1_code": null, "iso_3_code": "kup", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10278", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tauade", "iso_1_code": null, "iso_3_code": "ttd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10279", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Amam", "iso_1_code": null, "iso_3_code": "wer", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10280", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10276", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10273", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koiarian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Baraic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "\u00d6mie", "iso_1_code": null, "iso_3_code": "aom", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10283", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Barai", "iso_1_code": null, "iso_3_code": "bbb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10284", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ese", "iso_1_code": null, "iso_3_code": "mcq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10285", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Namiae", "iso_1_code": null, "iso_3_code": "nvm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10286", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10282", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koiaric", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Biage", "iso_1_code": null, "iso_3_code": "bdf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10288", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koiari", "iso_1_code": null, "iso_3_code": "kbk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10289", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koiali, Mountain", "iso_1_code": null, "iso_3_code": "kpx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10290", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Koita", "iso_1_code": null, "iso_3_code": "kqi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10291", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10287", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10281", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kwalean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Humene", "iso_1_code": null, "iso_3_code": "huf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10293", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uare", "iso_1_code": null, "iso_3_code": "ksj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10294", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mulaha", "iso_1_code": null, "iso_3_code": "mfw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10295", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10292", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mailuan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bauwaki", "iso_1_code": null, "iso_3_code": "bwk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10297", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Binahari", "iso_1_code": null, "iso_3_code": "bxz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10298", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Domu", "iso_1_code": null, "iso_3_code": "dof", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10299", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Laua", "iso_1_code": null, "iso_3_code": "luf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10300", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Magi", "iso_1_code": null, "iso_3_code": "mgu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10301", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Morawa", "iso_1_code": null, "iso_3_code": "mze", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10302", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10296", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manubaran", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Doromu-Koki", "iso_1_code": null, "iso_3_code": "kqc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10304", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Maria", "iso_1_code": null, "iso_3_code": "mds", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10305", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10303", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yareban", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aneme Wake", "iso_1_code": null, "iso_3_code": "aby", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10307", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bariji", "iso_1_code": null, "iso_3_code": "bjc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10308", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Moikodi", "iso_1_code": null, "iso_3_code": "mkp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10309", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nawaru", "iso_1_code": null, "iso_3_code": "nwr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10310", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yareba", "iso_1_code": null, "iso_3_code": "yrb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10311", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10306", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10262", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tanah Merah", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tanahmerah", "iso_1_code": null, "iso_3_code": "tcm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10313", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10312", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teberan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dadibi", "iso_1_code": null, "iso_3_code": "mps", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10315", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Folopa", "iso_1_code": null, "iso_3_code": "ppo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10316", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10314", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tirio", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Abom", "iso_1_code": null, "iso_3_code": "aob", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10318", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makayam", "iso_1_code": null, "iso_3_code": "aup", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10319", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Baramu", "iso_1_code": null, "iso_3_code": "bmz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10320", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bitur", "iso_1_code": null, "iso_3_code": "mcc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10321", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kiunum", "iso_1_code": null, "iso_3_code": "wei", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10322", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10317", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Turama-Kikorian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kairi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Rumu", "iso_1_code": null, "iso_3_code": "klq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10325", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10324", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaser", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Barikewa", "iso_1_code": null, "iso_3_code": "jbk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10327", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mouwase", "iso_1_code": null, "iso_3_code": "jmw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10328", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ikobi", "iso_1_code": null, "iso_3_code": "meb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10329", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10326", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10323", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dani", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dani Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dani, Upper Grand Valley", "iso_1_code": null, "iso_3_code": "dna", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10333", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dani, Lower Grand Valley", "iso_1_code": null, "iso_3_code": "dni", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10334", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dani, Mid Grand Valley", "iso_1_code": null, "iso_3_code": "dnt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10335", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dani, Western", "iso_1_code": null, "iso_3_code": "dnw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10336", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Hupla", "iso_1_code": null, "iso_3_code": "hap", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10337", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nggem", "iso_1_code": null, "iso_3_code": "nbq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10338", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Walak", "iso_1_code": null, "iso_3_code": "wlw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10339", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10332", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngalik", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nduga", "iso_1_code": null, "iso_3_code": "ndx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10341", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yali, Ninia", "iso_1_code": null, "iso_3_code": "nlk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10342", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Silimo", "iso_1_code": null, "iso_3_code": "wul", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10343", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yali, Pass Valley", "iso_1_code": null, "iso_3_code": "yac", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10344", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yali, Angguruk", "iso_1_code": null, "iso_3_code": "yli", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10345", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10340", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wano", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Wano", "iso_1_code": null, "iso_3_code": "wno", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10347", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10346", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10331", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Timor-Alor-Pantar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Wersing", "iso_1_code": null, "iso_3_code": "kvw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10349", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oirata", "iso_1_code": null, "iso_3_code": "oia", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10350", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Alor-Pantar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Alor", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Abui", "iso_1_code": null, "iso_3_code": "abz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10353", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Adang", "iso_1_code": null, "iso_3_code": "adn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10354", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hamap", "iso_1_code": null, "iso_3_code": "hmu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10355", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kabola", "iso_1_code": null, "iso_3_code": "klz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10356", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kafoa", "iso_1_code": null, "iso_3_code": "kpu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10357", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kui", "iso_1_code": null, "iso_3_code": "kvd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10358", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Klon", "iso_1_code": null, "iso_3_code": "kyo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10359", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kamang", "iso_1_code": null, "iso_3_code": "woi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10360", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10352", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pantar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Blagar", "iso_1_code": null, "iso_3_code": "beu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10362", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaera", "iso_1_code": null, "iso_3_code": "jka", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10363", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pantar, Western", "iso_1_code": null, "iso_3_code": "lev", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10364", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nedebang", "iso_1_code": null, "iso_3_code": "nec", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10365", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Reta", "iso_1_code": null, "iso_3_code": "ret", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10366", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teiwa", "iso_1_code": null, "iso_3_code": "twe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10367", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tereweng", "iso_1_code": null, "iso_3_code": "twg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10368", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10361", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10351", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tanglapui", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sawila", "iso_1_code": null, "iso_3_code": "swt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10370", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kula", "iso_1_code": null, "iso_3_code": "tpg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10371", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10369", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Timor", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bunak", "iso_1_code": null, "iso_3_code": "bfn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10373", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Fataluku", "iso_1_code": null, "iso_3_code": "ddg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10374", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Makalero", "iso_1_code": null, "iso_3_code": "mjb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10375", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makasae", "iso_1_code": null, "iso_3_code": "mkz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10376", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10372", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10348", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Bomberai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Karas", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Karas", "iso_1_code": null, "iso_3_code": "kgv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10379", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10378", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Bomberai Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Baham", "iso_1_code": null, "iso_3_code": "bdw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10381", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Iha", "iso_1_code": null, "iso_3_code": "ihp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10382", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10380", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10377", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wissel Lakes", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Auye", "iso_1_code": null, "iso_3_code": "auu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10384", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Moi", "iso_1_code": null, "iso_3_code": "daz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10385", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ekari", "iso_1_code": null, "iso_3_code": "ekg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10386", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Moni", "iso_1_code": null, "iso_3_code": "mnz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10387", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wolani", "iso_1_code": null, "iso_3_code": "wod", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10388", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10383", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10330", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Kutubu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Fasu", "iso_1_code": null, "iso_3_code": "faa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10390", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10389", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wiru", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Witu", "iso_1_code": null, "iso_3_code": "wiu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10392", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10391", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "9723", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Tsimshian.json b/data/Tsimshian.json index 3cbd01a3be4bacb64b2158cf175bcfb72278eb77..44a75bd0a85c9f946ad22f268fff27aaee4c9b59 100644 --- a/data/Tsimshian.json +++ b/data/Tsimshian.json @@ -2,51 +2,51 @@ "name": "Tsimshian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tsimshian", "iso_1_code": null, "iso_3_code": "tsi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10394", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nass-Gitksan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gitxsan", "iso_1_code": null, "iso_3_code": "git", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10396", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nisga\u2019a", "iso_1_code": null, "iso_3_code": "ncg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10397", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10395", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10393", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Tucanoan.json b/data/Tucanoan.json index b29666e50fb0787377b4dc3a8f0f94b61a1b3462..5498716387250e76b8f22052084bc9889e7afd27 100644 --- a/data/Tucanoan.json +++ b/data/Tucanoan.json @@ -2,380 +2,380 @@ "name": "Tucanoan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Miriti", "iso_1_code": null, "iso_3_code": "mmv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10399", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central Tucanoan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cubeo", "iso_1_code": null, "iso_3_code": "cub", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10401", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10400", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern Tucanoan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Arapaso", "iso_1_code": null, "iso_3_code": "arj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10403", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wanano", "iso_1_code": null, "iso_3_code": "gvc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10404", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Macuna", "iso_1_code": null, "iso_3_code": "myy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10405", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Piratapuyo", "iso_1_code": null, "iso_3_code": "pir", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10406", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bar\u00e1-Tuyuka", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Waimaha", "iso_1_code": null, "iso_3_code": "bao", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10408", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Barasana-Eduria", "iso_1_code": null, "iso_3_code": "bsn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10409", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Pokang\u00e1", "iso_1_code": null, "iso_3_code": "pok", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10410", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tuyuca", "iso_1_code": null, "iso_3_code": "tue", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10411", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10407", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Carapano", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Carapana", "iso_1_code": null, "iso_3_code": "cbc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10413", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tatuyo", "iso_1_code": null, "iso_3_code": "tav", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10414", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10412", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Desano-Siriano", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Desano", "iso_1_code": null, "iso_3_code": "des", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10416", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Siriano", "iso_1_code": null, "iso_3_code": "sri", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10417", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10415", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tucano", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tucano", "iso_1_code": null, "iso_3_code": "tuo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10419", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Wajiara", "iso_1_code": null, "iso_3_code": "yui", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10420", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10418", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10402", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western Tucanoan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Koreguaje", "iso_1_code": null, "iso_3_code": "coe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10422", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Maijuna", "iso_1_code": null, "iso_3_code": "ore", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10423", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tetete", "iso_1_code": null, "iso_3_code": "teb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10424", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tama", "iso_1_code": null, "iso_3_code": "ten", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10425", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tanimuca-Letuama", "iso_1_code": null, "iso_3_code": "tnc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10426", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yahuna", "iso_1_code": null, "iso_3_code": "ynu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10427", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Macaguaje", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Macaguaje", "iso_1_code": null, "iso_3_code": "mcl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10429", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Paicoca", "iso_1_code": null, "iso_3_code": "sey", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10430", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Siona", "iso_1_code": null, "iso_3_code": "snn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10431", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10428", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10421", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10398", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Tungusic.json b/data/Tungusic.json index e3d1ba5f0d05583bf8cc22728be1ef1ea1539e55..ab0246c3200239aaabbbcbce526a0b336d1eb27d 100644 --- a/data/Tungusic.json +++ b/data/Tungusic.json @@ -2,233 +2,233 @@ "name": "Tungusic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Even", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Even", "iso_1_code": null, "iso_3_code": "eve", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10435", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10434", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Evenki", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Evenki", "iso_1_code": null, "iso_3_code": "evn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10437", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oroqen", "iso_1_code": null, "iso_3_code": "orh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10438", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10436", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Negidal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Negidal", "iso_1_code": null, "iso_3_code": "neg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10440", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10439", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10433", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Southeast", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nanaj", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nanai", "iso_1_code": null, "iso_3_code": "gld", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10444", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Orok", "iso_1_code": null, "iso_3_code": "oaa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10445", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ulch", "iso_1_code": null, "iso_3_code": "ulc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10446", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10443", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Udihe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Oroch", "iso_1_code": null, "iso_3_code": "oac", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10448", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Udihe", "iso_1_code": null, "iso_3_code": "ude", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10449", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10447", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10442", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southwest", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jurchen", "iso_1_code": null, "iso_3_code": "juc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10451", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manchu", "iso_1_code": null, "iso_3_code": "mnc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10452", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Xibe", "iso_1_code": null, "iso_3_code": "sjo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10453", + "native_tokenizers": [], "scripts": [ "Mong" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10450", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10441", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10432", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Tupian.json b/data/Tupian.json index 92955d5c7d64a8095fc31c7ad46fff67143a0644..312e15f6b78a86bcc32c973f6a4bd8cd9271d161 100644 --- a/data/Tupian.json +++ b/data/Tupian.json @@ -2,1087 +2,1087 @@ "name": "Tupian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Awet\u00ed", "iso_1_code": null, "iso_3_code": "awe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10455", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kamayur\u00e1", "iso_1_code": null, "iso_3_code": "kay", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10456", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sater\u00e9-Maw\u00e9", "iso_1_code": null, "iso_3_code": "mav", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10457", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zo\u2019\u00e9", "iso_1_code": null, "iso_3_code": "pto", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10458", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Purubor\u00e1", "iso_1_code": null, "iso_3_code": "pur", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10459", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Arikem", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Arikem", "iso_1_code": null, "iso_3_code": "ait", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10461", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kariti\u00e2na", "iso_1_code": null, "iso_3_code": "ktn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10462", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10460", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Juruna", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jur\u00fana", "iso_1_code": null, "iso_3_code": "jur", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10464", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maritsau\u00e1", "iso_1_code": null, "iso_3_code": "msp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10465", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Xipaya", "iso_1_code": null, "iso_3_code": "xiy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10466", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10463", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mond\u00e9", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mond\u00e9", "iso_1_code": null, "iso_3_code": "mnd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10468", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Suru\u00ed", "iso_1_code": null, "iso_3_code": "sru", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10469", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aru\u00e1", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aru\u00e1", "iso_1_code": null, "iso_3_code": "arx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10471", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cinta Larga", "iso_1_code": null, "iso_3_code": "cin", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10472", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gavi\u00e3o do Jiparan\u00e1", "iso_1_code": null, "iso_3_code": "gvo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10473", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10470", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10467", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Munduruk\u00fa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kuru\u00e1ya", "iso_1_code": null, "iso_3_code": "kyr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10475", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Munduruk\u00fa", "iso_1_code": null, "iso_3_code": "myu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10476", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10474", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ramarama", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Karo", "iso_1_code": null, "iso_3_code": "arr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10478", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Urumi", "iso_1_code": null, "iso_3_code": "uru", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10479", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10477", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tupar\u00ed", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Akuntsu", "iso_1_code": null, "iso_3_code": "aqz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10481", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kepkiriw\u00e1t", "iso_1_code": null, "iso_3_code": "kpn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10482", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makur\u00e1p", "iso_1_code": null, "iso_3_code": "mpu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10483", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sakirabi\u00e1", "iso_1_code": null, "iso_3_code": "skf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10484", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tupar\u00ed", "iso_1_code": null, "iso_3_code": "tpr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10485", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wayor\u00f3", "iso_1_code": null, "iso_3_code": "wyr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10486", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10480", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tup\u00ed-Guaran\u00ed", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Arawet\u00e9", "iso_1_code": null, "iso_3_code": "awt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10488", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Guaran\u00ed", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ach\u00e9", "iso_1_code": null, "iso_3_code": "guq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10490", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Xet\u00e1", "iso_1_code": null, "iso_3_code": "xet", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10491", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Guaran\u00ed", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Guaran\u00ed, Paraguayan", "iso_1_code": "gn", "iso_3_code": "gug", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10493", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Guaran\u00ed, Mby\u00e1", "iso_1_code": "gn", "iso_3_code": "gun", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10494", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kaiw\u00e1", "iso_1_code": null, "iso_3_code": "kgk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10495", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Guaran\u00ed, Ava", "iso_1_code": "gn", "iso_3_code": "nhd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10496", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Pai Tavytera", "iso_1_code": null, "iso_3_code": "pta", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10497", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "\u00d1andeva", "iso_1_code": null, "iso_3_code": "tpj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10498", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bolivian Guaran\u00ed", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Guaran\u00ed, Western Bolivian", "iso_1_code": "gn", "iso_3_code": "gnw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10500", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Guaran\u00ed, Eastern Bolivian", "iso_1_code": "gn", "iso_3_code": "gui", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10501", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10499", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10492", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10489", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Guaray\u00fa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Guarayu", "iso_1_code": null, "iso_3_code": "gyr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10503", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Pauserna", "iso_1_code": null, "iso_3_code": "psm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10504", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sirion\u00f3", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jor\u00e1", "iso_1_code": null, "iso_3_code": "jor", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10506", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sirion\u00f3", "iso_1_code": null, "iso_3_code": "srq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10507", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yuqui", "iso_1_code": null, "iso_3_code": "yuq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10508", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10505", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10502", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kawahib", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Apiak\u00e1", "iso_1_code": null, "iso_3_code": "api", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10510", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uru-Pa-In", "iso_1_code": null, "iso_3_code": "urp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10511", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uru-Eu-Wau-Wau", "iso_1_code": null, "iso_3_code": "urz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10512", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Parintintin", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Amundava", "iso_1_code": null, "iso_3_code": "adw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10514", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "J\u00fama", "iso_1_code": null, "iso_3_code": "jua", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10515", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karipuna", "iso_1_code": null, "iso_3_code": "kuq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10516", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Paranaw\u00e1t", "iso_1_code": null, "iso_3_code": "paf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10517", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tenharim", "iso_1_code": null, "iso_3_code": "pah", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10518", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tukumanf\u00e9d", "iso_1_code": null, "iso_3_code": "tkf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10519", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wiraf\u00e9d", "iso_1_code": null, "iso_3_code": "wir", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10520", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Morerebi", "iso_1_code": null, "iso_3_code": "xmo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10521", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10513", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10509", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kayab\u00ed", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Asurini of Xing\u00fa", "iso_1_code": null, "iso_3_code": "asn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10523", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kayab\u00ed", "iso_1_code": null, "iso_3_code": "kyz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10524", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10522", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tenetehara", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Av\u00e1-Canoeiro", "iso_1_code": null, "iso_3_code": "avv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10526", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tapirap\u00e9", "iso_1_code": null, "iso_3_code": "taf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10527", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Akwawa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Asurini, Tocantins", "iso_1_code": null, "iso_3_code": "asu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10529", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Suru\u00ed do Par\u00e1", "iso_1_code": null, "iso_3_code": "mdz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10530", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Parakan\u00e3", "iso_1_code": null, "iso_3_code": "pak", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10531", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10528", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tenetehara", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Guajaj\u00e1ra", "iso_1_code": null, "iso_3_code": "gub", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10533", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Temb\u00e9", "iso_1_code": null, "iso_3_code": "tqb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10534", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10532", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10525", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tup\u00ed", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cocama", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kukama-Kukamiria", "iso_1_code": null, "iso_3_code": "cod", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10537", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Omagua", "iso_1_code": null, "iso_3_code": "omg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10538", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10536", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tup\u00ed", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Potigu\u00e1ra", "iso_1_code": null, "iso_3_code": "pog", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10540", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tupinikin", "iso_1_code": null, "iso_3_code": "tpk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10541", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tupinamb\u00e1", "iso_1_code": null, "iso_3_code": "tpn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10542", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nhengatu", "iso_1_code": null, "iso_3_code": "yrl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10543", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10539", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10535", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wayamp\u00ed", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aur\u00e1", "iso_1_code": null, "iso_3_code": "aux", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10545", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Amanay\u00e9", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Anamb\u00e9", "iso_1_code": null, "iso_3_code": "aan", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10547", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Amanay\u00e9", "iso_1_code": null, "iso_3_code": "ama", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10548", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Guaj\u00e1", "iso_1_code": null, "iso_3_code": "gvj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10549", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Turiw\u00e1ra", "iso_1_code": null, "iso_3_code": "twt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10550", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaapor", "iso_1_code": null, "iso_3_code": "urb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10551", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ararandew\u00e1ra", "iso_1_code": null, "iso_3_code": "xaj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10552", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10546", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wayamp\u00ed", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tek\u00f3", "iso_1_code": null, "iso_3_code": "eme", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10554", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Wayampi", "iso_1_code": null, "iso_3_code": "oym", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10555", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10553", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10544", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10487", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10454", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Turkic.json b/data/Turkic.json index a6d775c2a3fcb949a93b9c082351e7fb60815d86..4ae90883c63679c534af28af82944e4de683b306 100644 --- a/data/Turkic.json +++ b/data/Turkic.json @@ -2,1395 +2,1072 @@ "name": "Turkic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tr\")", - "original_lang_name": "turkish", - "original_lang_code": "tur", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Cyrl": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Urum", "iso_1_code": null, "iso_3_code": "uum", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10557", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bolgar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Cyrl": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Chuvash", "iso_1_code": "cv", "iso_3_code": "chv", + "children": [], "tokenizers": { "Cyrl": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10559", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "script": "Cyrl", + "class_name": "StanzaTokenizer" + } + }, "node_i": "10558", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"ug\")", - "original_lang_name": "uyghur", - "original_lang_code": "uig", - "scripts": [ - "Latn", - "Cyrl", - "Arab" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Cyrl": { - "full_object": "StanzaTokenizer(\"ug\")", - "original_lang_name": "uyghur", - "original_lang_code": "uig", - "scripts": [ - "Latn", - "Cyrl", - "Arab" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Arab": { - "full_object": "StanzaTokenizer(\"ug\")", - "original_lang_name": "uyghur", - "original_lang_code": "uig", - "scripts": [ - "Latn", - "Cyrl", - "Arab" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ainu", "iso_1_code": null, "iso_3_code": "aib", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10561", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chagatai", "iso_1_code": null, "iso_3_code": "chg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10562", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ili Turki", "iso_1_code": null, "iso_3_code": "ili", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10563", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uyghur", "iso_1_code": "ug", "iso_3_code": "uig", + "children": [], "tokenizers": { - "Latn": { + "Arab": { "full_object": "StanzaTokenizer(\"ug\")", "original_lang_name": "uyghur", "original_lang_code": "uig", - "scripts": [ - "Latn", - "Cyrl", - "Arab" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Arab", + "class_name": "StanzaTokenizer" }, - "Cyrl": { - "full_object": "StanzaTokenizer(\"ug\")", - "original_lang_name": "uyghur", - "original_lang_code": "uig", - "scripts": [ - "Latn", - "Cyrl", - "Arab" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "Latn": { + "full_object": "SpaCyTokenizer(\"tr\")", + "original_lang_name": "turkish", + "original_lang_code": "tur", + "script": "Latn", + "class_name": "SpaCyTokenizer" }, - "Arab": { - "full_object": "StanzaTokenizer(\"ug\")", - "original_lang_name": "uyghur", - "original_lang_code": "uig", - "scripts": [ - "Latn", - "Cyrl", - "Arab" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "Cyrl": { + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10564", + "native_tokenizers": [ + "Arab" + ], "scripts": [ "Latn", "Arab", "Cyrl" - ], - "own_tokenizer": true + ] }, { "name": "Uzbek, Northern", "iso_1_code": "uz", "iso_3_code": "uzn", + "children": [], "tokenizers": { "Latn": { - "full_object": "StanzaTokenizer(\"ug\")", - "original_lang_name": "uyghur", - "original_lang_code": "uig", - "scripts": [ - "Latn", - "Cyrl", - "Arab" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tr\")", + "original_lang_name": "turkish", + "original_lang_code": "tur", + "script": "Latn", + "class_name": "SpaCyTokenizer" }, "Cyrl": { - "full_object": "StanzaTokenizer(\"ug\")", - "original_lang_name": "uyghur", - "original_lang_code": "uig", - "scripts": [ - "Latn", - "Cyrl", - "Arab" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10565", + "native_tokenizers": [], "scripts": [ "Latn", "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Uzbek, Southern", "iso_1_code": "uz", "iso_3_code": "uzs", + "children": [], "tokenizers": { "Arab": { "full_object": "StanzaTokenizer(\"ug\")", "original_lang_name": "uyghur", "original_lang_code": "uig", - "scripts": [ - "Latn", - "Cyrl", - "Arab" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Arab", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10566", + "native_tokenizers": [], "scripts": [ "Arab" - ], - "own_tokenizer": false + ] }, { "name": "Yugur, West", "iso_1_code": null, "iso_3_code": "ybe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10567", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Arab": { + "full_object": "StanzaTokenizer(\"ug\")", + "original_lang_name": "uyghur", + "original_lang_code": "uig", + "script": "Arab", + "class_name": "StanzaTokenizer" + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"tr\")", + "original_lang_name": "turkish", + "original_lang_code": "tur", + "script": "Latn", + "class_name": "SpaCyTokenizer" + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "script": "Cyrl", + "class_name": "StanzaTokenizer" + } + }, "node_i": "10560", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Cyrl": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Altai, Southern", "iso_1_code": null, "iso_3_code": "alt", + "children": [], "tokenizers": { "Cyrl": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10569", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Altai, Northern", "iso_1_code": null, "iso_3_code": "atv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10570", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shor", "iso_1_code": null, "iso_3_code": "cjs", + "children": [], "tokenizers": { "Cyrl": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10571", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Dolgan", "iso_1_code": null, "iso_3_code": "dlg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10572", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karagas", "iso_1_code": null, "iso_3_code": "kim", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10573", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khakas", "iso_1_code": null, "iso_3_code": "kjh", + "children": [], "tokenizers": { "Cyrl": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10574", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Yakut", "iso_1_code": null, "iso_3_code": "sah", + "children": [], "tokenizers": { "Cyrl": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10575", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Tuvan", "iso_1_code": null, "iso_3_code": "tyv", + "children": [], "tokenizers": { "Cyrl": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10576", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "script": "Cyrl", + "class_name": "StanzaTokenizer" + } + }, "node_i": "10568", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Latn": { - "full_object": "SpaCyTokenizer(\"tr\")", - "original_lang_name": "turkish", - "original_lang_code": "tur", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Cyrl": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Crimean Tatar", "iso_1_code": null, "iso_3_code": "crh", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tr\")", "original_lang_name": "turkish", "original_lang_code": "tur", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" }, "Cyrl": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10578", + "native_tokenizers": [], "scripts": [ "Latn", "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Khalaj", "iso_1_code": null, "iso_3_code": "klj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10579", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kashkay", "iso_1_code": null, "iso_3_code": "qxq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10580", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Salar", "iso_1_code": null, "iso_3_code": "slr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10581", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Azerbaijani", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Latn": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Cyrl": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Azerbaijani, South", "iso_1_code": "az", "iso_3_code": "azb", + "children": [], "tokenizers": { "Arab": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Latn": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Cyrl": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"ug\")", + "original_lang_name": "uyghur", + "original_lang_code": "uig", + "script": "Arab", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10583", + "native_tokenizers": [], "scripts": [ "Arab" - ], - "own_tokenizer": true + ] }, { "name": "Azerbaijani, North", "iso_1_code": "az", "iso_3_code": "azj", + "children": [], "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, "Latn": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"tr\")", + "original_lang_name": "turkish", + "original_lang_code": "tur", + "script": "Latn", + "class_name": "SpaCyTokenizer" }, "Cyrl": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10584", + "native_tokenizers": [], "scripts": [ "Latn", "Cyrl" - ], - "own_tokenizer": true + ] } ], - "node_i": "10582", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Turkish", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tr\")", "original_lang_name": "turkish", "original_lang_code": "tur", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" }, "Arab": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"ug\")", + "original_lang_name": "uyghur", + "original_lang_code": "uig", + "script": "Arab", + "class_name": "StanzaTokenizer" }, "Cyrl": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, + "node_i": "10582", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Turkish", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Balkan Gagauz Turkish", "iso_1_code": null, "iso_3_code": "bgx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10586", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gagauz", "iso_1_code": null, "iso_3_code": "gag", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tr\")", "original_lang_name": "turkish", "original_lang_code": "tur", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" }, "Cyrl": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10587", + "native_tokenizers": [], "scripts": [ "Latn", "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Khorasani Turkish", "iso_1_code": null, "iso_3_code": "kmz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10588", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Turkish", "iso_1_code": "tr", "iso_3_code": "tur", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tr\")", "original_lang_name": "turkish", "original_lang_code": "tur", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], - "node_i": "10589", - "scripts": [ - "Latn" - ], - "own_tokenizer": true - } - ], - "node_i": "10585", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Turkmenian", - "iso_1_code": null, - "iso_3_code": null, - "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" + "node_i": "10589", + "native_tokenizers": [ + "Latn" ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, + "scripts": [ + "Latn" + ] + } + ], + "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"tr\")", "original_lang_name": "turkish", "original_lang_code": "tur", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" }, "Cyrl": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, + "node_i": "10585", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Turkmenian", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Turkmen", "iso_1_code": "tk", "iso_3_code": "tuk", + "children": [], "tokenizers": { - "Arab": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, "Latn": { "full_object": "SpaCyTokenizer(\"tr\")", "original_lang_name": "turkish", "original_lang_code": "tur", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" + }, + "Arab": { + "full_object": "StanzaTokenizer(\"ug\")", + "original_lang_name": "uyghur", + "original_lang_code": "uig", + "script": "Arab", + "class_name": "StanzaTokenizer" }, "Cyrl": { - "full_object": "SpaCyTokenizer(\"az\")", - "original_lang_name": "azerbaijani", - "original_lang_code": "aze", - "scripts": [ - "Arab", - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10591", + "native_tokenizers": [], "scripts": [ "Latn", "Cyrl", "Arab" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tr\")", + "original_lang_name": "turkish", + "original_lang_code": "tur", + "script": "Latn", + "class_name": "SpaCyTokenizer" + }, + "Arab": { + "full_object": "StanzaTokenizer(\"ug\")", + "original_lang_name": "uyghur", + "original_lang_code": "uig", + "script": "Arab", + "class_name": "StanzaTokenizer" + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "script": "Cyrl", + "class_name": "StanzaTokenizer" + } + }, "node_i": "10590", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "10577", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Western", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tr\")", + "original_lang_name": "turkish", + "original_lang_code": "tur", + "script": "Latn", + "class_name": "SpaCyTokenizer" + }, + "Arab": { + "full_object": "StanzaTokenizer(\"ug\")", + "original_lang_name": "uyghur", + "original_lang_code": "uig", + "script": "Arab", + "class_name": "StanzaTokenizer" + }, "Cyrl": { "full_object": "StanzaTokenizer(\"kk\")", "original_lang_name": "kazakh", "original_lang_code": "kaz", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Latn": { - "full_object": "SpaCyTokenizer(\"tt\")", - "original_lang_name": "tatar", - "original_lang_code": "tat", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, + "node_i": "10577", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Western", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Aralo-Caspian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Cyrl": { - "full_object": "StanzaTokenizer(\"kk\")", - "original_lang_name": "kazakh", - "original_lang_code": "kaz", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - }, - "Latn": { - "full_object": "SpaCyTokenizer(\"tt\")", - "original_lang_name": "tatar", - "original_lang_code": "tat", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Karakalpak", "iso_1_code": null, "iso_3_code": "kaa", + "children": [], "tokenizers": { "Cyrl": { "full_object": "StanzaTokenizer(\"kk\")", "original_lang_name": "kazakh", "original_lang_code": "kaz", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" }, "Latn": { - "full_object": "SpaCyTokenizer(\"tt\")", - "original_lang_name": "tatar", - "original_lang_code": "tat", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tr\")", + "original_lang_name": "turkish", + "original_lang_code": "tur", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "10594", + "native_tokenizers": [], "scripts": [ "Cyrl", "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kazakh", "iso_1_code": "kk", "iso_3_code": "kaz", + "children": [], "tokenizers": { "Cyrl": { "full_object": "StanzaTokenizer(\"kk\")", "original_lang_name": "kazakh", "original_lang_code": "kaz", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10595", - "scripts": [ + "native_tokenizers": [ "Cyrl" ], - "own_tokenizer": true + "scripts": [ + "Cyrl" + ] }, { "name": "Kyrgyz", "iso_1_code": "ky", "iso_3_code": "kir", + "children": [], "tokenizers": { "Cyrl": { "full_object": "SpaCyTokenizer(\"ky\")", "original_lang_name": "kirghiz", "original_lang_code": "kir", - "scripts": [ - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "10596", - "scripts": [ + "native_tokenizers": [ "Cyrl" ], - "own_tokenizer": true + "scripts": [ + "Cyrl" + ] }, { "name": "Nogai", "iso_1_code": null, "iso_3_code": "nog", + "children": [], "tokenizers": { "Cyrl": { "full_object": "StanzaTokenizer(\"kk\")", "original_lang_name": "kazakh", "original_lang_code": "kaz", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10597", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Kuman", "iso_1_code": null, "iso_3_code": "qwm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10598", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "10593", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Ponto-Caspian", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Cyrl": { "full_object": "StanzaTokenizer(\"kk\")", "original_lang_name": "kazakh", "original_lang_code": "kaz", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" }, "Latn": { - "full_object": "SpaCyTokenizer(\"tt\")", - "original_lang_name": "tatar", - "original_lang_code": "tat", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tr\")", + "original_lang_name": "turkish", + "original_lang_code": "tur", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "10593", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Ponto-Caspian", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Krimchak", "iso_1_code": null, "iso_3_code": "jct", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10600", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karaim", "iso_1_code": null, "iso_3_code": "kdr", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"tt\")", - "original_lang_name": "tatar", - "original_lang_code": "tat", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tr\")", + "original_lang_name": "turkish", + "original_lang_code": "tur", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "10601", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Karachay-Balkar", "iso_1_code": null, "iso_3_code": "krc", + "children": [], "tokenizers": { "Cyrl": { "full_object": "StanzaTokenizer(\"kk\")", "original_lang_name": "kazakh", "original_lang_code": "kaz", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10602", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Kumyk", "iso_1_code": null, "iso_3_code": "kum", + "children": [], "tokenizers": { "Cyrl": { "full_object": "StanzaTokenizer(\"kk\")", "original_lang_name": "kazakh", "original_lang_code": "kaz", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10603", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "script": "Cyrl", + "class_name": "StanzaTokenizer" + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"tr\")", + "original_lang_name": "turkish", + "original_lang_code": "tur", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "10599", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uralian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tt\")", - "original_lang_name": "tatar", - "original_lang_code": "tat", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Cyrl": { - "full_object": "SpaCyTokenizer(\"tt\")", - "original_lang_name": "tatar", - "original_lang_code": "tat", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bashkort", "iso_1_code": "ba", "iso_3_code": "bak", + "children": [], "tokenizers": { "Cyrl": { "full_object": "SpaCyTokenizer(\"tt\")", "original_lang_name": "tatar", "original_lang_code": "tat", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "10605", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Chulym", "iso_1_code": null, "iso_3_code": "clw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10606", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Siberian Tatar", "iso_1_code": null, "iso_3_code": "sty", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10607", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tatar", "iso_1_code": "tt", "iso_3_code": "tat", + "children": [], "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"tt\")", - "original_lang_name": "tatar", - "original_lang_code": "tat", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, "Cyrl": { "full_object": "SpaCyTokenizer(\"tt\")", "original_lang_name": "tatar", "original_lang_code": "tat", - "scripts": [ - "Latn", - "Cyrl" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"tr\")", + "original_lang_name": "turkish", + "original_lang_code": "tur", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "10608", + "native_tokenizers": [ + "Cyrl" + ], "scripts": [ "Latn", "Cyrl" - ], - "own_tokenizer": true + ] } ], + "tokenizers": { + "Cyrl": { + "full_object": "SpaCyTokenizer(\"tt\")", + "original_lang_name": "tatar", + "original_lang_code": "tat", + "script": "Cyrl", + "class_name": "SpaCyTokenizer" + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"tr\")", + "original_lang_name": "turkish", + "original_lang_code": "tur", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "10604", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "script": "Cyrl", + "class_name": "StanzaTokenizer" + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"tr\")", + "original_lang_name": "turkish", + "original_lang_code": "tur", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "10592", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Arab": { + "full_object": "StanzaTokenizer(\"ug\")", + "original_lang_name": "uyghur", + "original_lang_code": "uig", + "script": "Arab", + "class_name": "StanzaTokenizer" + }, + "Latn": { + "full_object": "SpaCyTokenizer(\"tr\")", + "original_lang_name": "turkish", + "original_lang_code": "tur", + "script": "Latn", + "class_name": "SpaCyTokenizer" + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"kk\")", + "original_lang_name": "kazakh", + "original_lang_code": "kaz", + "script": "Cyrl", + "class_name": "StanzaTokenizer" + } + }, "node_i": "10556", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Tuu.json b/data/Tuu.json index 63cf9fc059c16cb9cebe84a408c7a902d0d45b27..90e587f119dbec61e7eb7b8dabb530ba47f57a45 100644 --- a/data/Tuu.json +++ b/data/Tuu.json @@ -2,102 +2,102 @@ "name": "Tuu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "!Ui", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "\u2021Ungkue", "iso_1_code": null, "iso_3_code": "gku", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10611", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Seroa", "iso_1_code": null, "iso_3_code": "kqu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10612", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "N\u01c1ng", "iso_1_code": null, "iso_3_code": "ngh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10613", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "\u01c0Xam", "iso_1_code": null, "iso_3_code": "xam", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10614", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "\u01c1Xegwi", "iso_1_code": null, "iso_3_code": "xeg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10615", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10610", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Taa-Lower Nossob", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Taa", "iso_1_code": null, "iso_3_code": "nmn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10617", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lower Nossob", "iso_1_code": null, "iso_3_code": "nsb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10618", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10616", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10609", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Unclassified.json b/data/Unclassified.json index cbef3a0646a91ec036732aefb05fc995d7358241..0f646144a0ed18dfdff786398c0b0f9fd6497834 100644 --- a/data/Unclassified.json +++ b/data/Unclassified.json @@ -2,560 +2,560 @@ "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Alsea", "iso_1_code": null, "iso_3_code": "aes", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10620", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aguano", "iso_1_code": null, "iso_3_code": "aga", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10621", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Awishira", "iso_1_code": null, "iso_3_code": "ash", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10622", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Agavotaguerra", "iso_1_code": null, "iso_3_code": "avo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10623", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ar\u00e1ra, Mato Grosso", "iso_1_code": null, "iso_3_code": "axg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10624", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bung", "iso_1_code": null, "iso_3_code": "bqd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10625", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Beothuk", "iso_1_code": null, "iso_3_code": "bue", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10626", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Carabayo", "iso_1_code": null, "iso_3_code": "cby", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10627", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Doso", "iso_1_code": null, "iso_3_code": "dol", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10628", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gail", "iso_1_code": null, "iso_3_code": "gic", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10629", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Himarim\u00e3", "iso_1_code": null, "iso_3_code": "hir", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10630", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Haitian Vodoun Culture Language", "iso_1_code": null, "iso_3_code": "hvc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10631", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kara", "iso_1_code": null, "iso_3_code": "kah", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10632", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lufu", "iso_1_code": null, "iso_3_code": "ldq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10633", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lepki", "iso_1_code": null, "iso_3_code": "lpe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10634", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kasabe", "iso_1_code": null, "iso_3_code": "luw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10635", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Majhwar", "iso_1_code": null, "iso_3_code": "mmj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10636", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mangue", "iso_1_code": null, "iso_3_code": "mom", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10637", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Molof", "iso_1_code": null, "iso_3_code": "msl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10638", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Namla", "iso_1_code": null, "iso_3_code": "naa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10639", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pankarar\u00e9", "iso_1_code": null, "iso_3_code": "pax", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10640", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pijao", "iso_1_code": null, "iso_3_code": "pij", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10641", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Polari", "iso_1_code": null, "iso_3_code": "pld", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10642", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mercheros", "iso_1_code": null, "iso_3_code": "quq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10643", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rer Bare", "iso_1_code": null, "iso_3_code": "rer", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10644", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Murkim", "iso_1_code": null, "iso_3_code": "rmh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10645", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shabo", "iso_1_code": null, "iso_3_code": "sbf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10646", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kimki", "iso_1_code": null, "iso_3_code": "sbt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10647", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shanenawa", "iso_1_code": null, "iso_3_code": "swo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10648", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tingui-Boto", "iso_1_code": null, "iso_3_code": "tgv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10649", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Truk\u00e1", "iso_1_code": null, "iso_3_code": "tka", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10650", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Takelma", "iso_1_code": null, "iso_3_code": "tkm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10651", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tofanma", "iso_1_code": null, "iso_3_code": "tlg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10652", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Trememb\u00e9", "iso_1_code": null, "iso_3_code": "tme", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10653", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Traveller Scottish", "iso_1_code": null, "iso_3_code": "trl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10654", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lule", "iso_1_code": null, "iso_3_code": "ule", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10655", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Usku", "iso_1_code": null, "iso_3_code": "ulf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10656", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kujarge", "iso_1_code": null, "iso_3_code": "vkj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10657", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wakon\u00e1", "iso_1_code": null, "iso_3_code": "waf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10658", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Weyto", "iso_1_code": null, "iso_3_code": "woy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10659", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wasu", "iso_1_code": null, "iso_3_code": "wsu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10660", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waxianghua", "iso_1_code": null, "iso_3_code": "wxa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10661", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Adai", "iso_1_code": null, "iso_3_code": "xad", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10662", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaimb\u00e9", "iso_1_code": null, "iso_3_code": "xai", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10663", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kenaboi", "iso_1_code": null, "iso_3_code": "xbn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10664", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kambiw\u00e1", "iso_1_code": null, "iso_3_code": "xbw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10665", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kembra", "iso_1_code": null, "iso_3_code": "xkw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10666", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kapinaw\u00e1", "iso_1_code": null, "iso_3_code": "xpn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10667", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aranama-Tamique", "iso_1_code": null, "iso_3_code": "xrt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10668", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Solano", "iso_1_code": null, "iso_3_code": "xso", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10669", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yitha Yitha", "iso_1_code": null, "iso_3_code": "xth", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10670", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tambora", "iso_1_code": null, "iso_3_code": "xxt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10671", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pum\u00e9", "iso_1_code": null, "iso_3_code": "yae", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10672", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yeni", "iso_1_code": null, "iso_3_code": "yei", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10673", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yetfa", "iso_1_code": null, "iso_3_code": "yet", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10674", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10619", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Uralic.json b/data/Uralic.json index 84fb9cfcd2942f841a27e6469d5615153066bb92..8bd4715d108ef2607dba0c2d88e20c12b256ad38 100644 --- a/data/Uralic.json +++ b/data/Uralic.json @@ -2,1160 +2,1030 @@ "name": "Uralic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"hu\")", - "original_lang_name": "hungarian", - "original_lang_code": "hun", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - }, - "Cyrl": { - "full_object": "StanzaTokenizer(\"myv\")", - "original_lang_name": "erzya", - "original_lang_code": "myv", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Hungarian", "iso_1_code": "hu", "iso_3_code": "hun", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"hu\")", "original_lang_name": "hungarian", "original_lang_code": "hun", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "10676", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] }, { "name": "Khanty", "iso_1_code": null, "iso_3_code": "kca", + "children": [], "tokenizers": { "Cyrl": { "full_object": "StanzaTokenizer(\"myv\")", "original_lang_name": "erzya", "original_lang_code": "myv", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10677", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Mansi", "iso_1_code": null, "iso_3_code": "mns", + "children": [], "tokenizers": { "Cyrl": { "full_object": "StanzaTokenizer(\"myv\")", "original_lang_name": "erzya", "original_lang_code": "myv", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10678", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Finnic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"fi\")", - "original_lang_name": "finnish", - "original_lang_code": "fin", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Estonian, Standard", "iso_1_code": "et", "iso_3_code": "ekk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"et\")", - "original_lang_name": "estonian", - "original_lang_code": "est", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"fi\")", + "original_lang_name": "finnish", + "original_lang_code": "fin", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "10680", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": true + ] }, { "name": "Finnish", "iso_1_code": "fi", "iso_3_code": "fin", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"fi\")", "original_lang_name": "finnish", "original_lang_code": "fin", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "10681", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] }, { "name": "Me\u00e4nkieli", "iso_1_code": null, "iso_3_code": "fit", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"fi\")", "original_lang_name": "finnish", "original_lang_code": "fin", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "10682", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Finnish, Kven", "iso_1_code": null, "iso_3_code": "fkv", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"fi\")", "original_lang_name": "finnish", "original_lang_code": "fin", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "10683", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ingrian", "iso_1_code": null, "iso_3_code": "izh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10684", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karelian", "iso_1_code": null, "iso_3_code": "krl", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"fi\")", "original_lang_name": "finnish", "original_lang_code": "fin", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "10685", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Liv", "iso_1_code": null, "iso_3_code": "liv", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"fi\")", "original_lang_name": "finnish", "original_lang_code": "fin", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "10686", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ludian", "iso_1_code": null, "iso_3_code": "lud", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"fi\")", "original_lang_name": "finnish", "original_lang_code": "fin", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "10687", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Livvi-Karelian", "iso_1_code": null, "iso_3_code": "olo", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"fi\")", "original_lang_name": "finnish", "original_lang_code": "fin", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "10688", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Veps", "iso_1_code": null, "iso_3_code": "vep", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"fi\")", "original_lang_name": "finnish", "original_lang_code": "fin", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "10689", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Vod", "iso_1_code": null, "iso_3_code": "vot", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"fi\")", "original_lang_name": "finnish", "original_lang_code": "fin", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "10690", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "V\u00f5ro", "iso_1_code": "et", "iso_3_code": "vro", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"et\")", - "original_lang_name": "estonian", - "original_lang_code": "est", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"fi\")", + "original_lang_name": "finnish", + "original_lang_code": "fin", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "10691", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": true + ] }, { "name": "Krevinian", "iso_1_code": null, "iso_3_code": "zkv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10692", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"fi\")", + "original_lang_name": "finnish", + "original_lang_code": "fin", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "10679", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mari", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Cyrl": { - "full_object": "StanzaTokenizer(\"myv\")", - "original_lang_name": "erzya", - "original_lang_code": "myv", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Mari, Meadow", "iso_1_code": null, "iso_3_code": "mhr", + "children": [], "tokenizers": { "Cyrl": { "full_object": "StanzaTokenizer(\"myv\")", "original_lang_name": "erzya", "original_lang_code": "myv", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10694", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Mari, Hill", "iso_1_code": null, "iso_3_code": "mrj", + "children": [], "tokenizers": { "Cyrl": { "full_object": "StanzaTokenizer(\"myv\")", "original_lang_name": "erzya", "original_lang_code": "myv", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10695", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] } ], - "node_i": "10693", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Mordvin", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Cyrl": { "full_object": "StanzaTokenizer(\"myv\")", "original_lang_name": "erzya", "original_lang_code": "myv", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, + "node_i": "10693", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Mordvin", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Moksha", "iso_1_code": null, "iso_3_code": "mdf", + "children": [], "tokenizers": { "Cyrl": { "full_object": "StanzaTokenizer(\"myv\")", "original_lang_name": "erzya", "original_lang_code": "myv", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10697", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Erzya", "iso_1_code": null, "iso_3_code": "myv", + "children": [], "tokenizers": { "Cyrl": { "full_object": "StanzaTokenizer(\"myv\")", "original_lang_name": "erzya", "original_lang_code": "myv", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10698", - "scripts": [ + "native_tokenizers": [ "Cyrl" ], - "own_tokenizer": true + "scripts": [ + "Cyrl" + ] } ], - "node_i": "10696", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Permian", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Cyrl": { "full_object": "StanzaTokenizer(\"myv\")", "original_lang_name": "erzya", "original_lang_code": "myv", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, + "node_i": "10696", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Permian", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Udmurt", "iso_1_code": null, "iso_3_code": "udm", + "children": [], "tokenizers": { "Cyrl": { "full_object": "StanzaTokenizer(\"myv\")", "original_lang_name": "erzya", "original_lang_code": "myv", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10700", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Komi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Cyrl": { - "full_object": "StanzaTokenizer(\"myv\")", - "original_lang_name": "erzya", - "original_lang_code": "myv", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Komi-Permyak", "iso_1_code": "kv", "iso_3_code": "koi", + "children": [], "tokenizers": { "Cyrl": { "full_object": "StanzaTokenizer(\"myv\")", "original_lang_name": "erzya", "original_lang_code": "myv", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10702", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Komi-Zyrian", "iso_1_code": "kv", "iso_3_code": "kpv", + "children": [], "tokenizers": { "Cyrl": { "full_object": "StanzaTokenizer(\"myv\")", "original_lang_name": "erzya", "original_lang_code": "myv", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10703", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "script": "Cyrl", + "class_name": "StanzaTokenizer" + } + }, "node_i": "10701", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "script": "Cyrl", + "class_name": "StanzaTokenizer" + } + }, "node_i": "10699", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sami", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"se\")", - "original_lang_name": "northern_sami", - "original_lang_code": "sme", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"se\")", - "original_lang_name": "northern_sami", - "original_lang_code": "sme", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Saami, Akkala", "iso_1_code": null, "iso_3_code": "sia", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10706", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saami, Kildin", "iso_1_code": null, "iso_3_code": "sjd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10707", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saami, Kemi", "iso_1_code": null, "iso_3_code": "sjk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10708", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saami, Ter", "iso_1_code": null, "iso_3_code": "sjt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10709", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saami, Inari", "iso_1_code": null, "iso_3_code": "smn", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"se\")", "original_lang_name": "northern_sami", "original_lang_code": "sme", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10710", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Saami, Skolt", "iso_1_code": null, "iso_3_code": "sms", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"se\")", "original_lang_name": "northern_sami", "original_lang_code": "sme", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10711", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "10705", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Southern", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"se\")", "original_lang_name": "northern_sami", "original_lang_code": "sme", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "10705", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Southern", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Saami, Ume", "iso_1_code": null, "iso_3_code": "sju", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"se\")", "original_lang_name": "northern_sami", "original_lang_code": "sme", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10713", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "10712", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Western", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"se\")", "original_lang_name": "northern_sami", "original_lang_code": "sme", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "10712", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Western", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "StanzaTokenizer(\"se\")", - "original_lang_name": "northern_sami", - "original_lang_code": "sme", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Saami, Pite", "iso_1_code": null, "iso_3_code": "sje", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10716", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saami, North", "iso_1_code": "se", "iso_3_code": "sme", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"se\")", "original_lang_name": "northern_sami", "original_lang_code": "sme", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10717", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] }, { "name": "Saami, Lule", "iso_1_code": null, "iso_3_code": "smj", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"se\")", "original_lang_name": "northern_sami", "original_lang_code": "sme", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10718", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "10715", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Southern", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"se\")", "original_lang_name": "northern_sami", "original_lang_code": "sme", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, + "node_i": "10715", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Southern", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Saami, South", "iso_1_code": null, "iso_3_code": "sma", + "children": [], "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"se\")", "original_lang_name": "northern_sami", "original_lang_code": "sme", - "scripts": [ - "Latn" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10720", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"se\")", + "original_lang_name": "northern_sami", + "original_lang_code": "sme", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "10719", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"se\")", + "original_lang_name": "northern_sami", + "original_lang_code": "sme", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "10714", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "StanzaTokenizer(\"se\")", + "original_lang_name": "northern_sami", + "original_lang_code": "sme", + "script": "Latn", + "class_name": "StanzaTokenizer" + } + }, "node_i": "10704", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Samoyed", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Cyrl": { - "full_object": "StanzaTokenizer(\"myv\")", - "original_lang_name": "erzya", - "original_lang_code": "myv", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Mator", "iso_1_code": null, "iso_3_code": "mtm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10722", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern Samoyed", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Cyrl": { - "full_object": "StanzaTokenizer(\"myv\")", - "original_lang_name": "erzya", - "original_lang_code": "myv", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Nganasan", "iso_1_code": null, "iso_3_code": "nio", + "children": [], "tokenizers": { "Cyrl": { "full_object": "StanzaTokenizer(\"myv\")", "original_lang_name": "erzya", "original_lang_code": "myv", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10724", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Nenets", "iso_1_code": null, "iso_3_code": "yrk", + "children": [], "tokenizers": { "Cyrl": { "full_object": "StanzaTokenizer(\"myv\")", "original_lang_name": "erzya", "original_lang_code": "myv", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10725", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Enets", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Enets, Forest", "iso_1_code": null, "iso_3_code": "enf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10727", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Enets, Tundra", "iso_1_code": null, "iso_3_code": "enh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10728", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yurats", "iso_1_code": null, "iso_3_code": "rts", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10729", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10726", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "10723", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Southern Samoyed", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Cyrl": { "full_object": "StanzaTokenizer(\"myv\")", "original_lang_name": "erzya", "original_lang_code": "myv", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, + "node_i": "10723", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Southern Samoyed", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Selkup", "iso_1_code": null, "iso_3_code": "sel", + "children": [], "tokenizers": { "Cyrl": { "full_object": "StanzaTokenizer(\"myv\")", "original_lang_name": "erzya", "original_lang_code": "myv", - "scripts": [ - "Cyrl" - ], - "class_name": "StanzaTokenizer", - "macrolanguage": false + "script": "Cyrl", + "class_name": "StanzaTokenizer" } }, - "children": [], "node_i": "10731", + "native_tokenizers": [], "scripts": [ "Cyrl" - ], - "own_tokenizer": false + ] }, { "name": "Kamas", "iso_1_code": null, "iso_3_code": "xas", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10732", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "script": "Cyrl", + "class_name": "StanzaTokenizer" + } + }, "node_i": "10730", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "script": "Cyrl", + "class_name": "StanzaTokenizer" + } + }, "node_i": "10721", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"hu\")", + "original_lang_name": "hungarian", + "original_lang_code": "hun", + "script": "Latn", + "class_name": "SpaCyTokenizer" + }, + "Cyrl": { + "full_object": "StanzaTokenizer(\"myv\")", + "original_lang_name": "erzya", + "original_lang_code": "myv", + "script": "Cyrl", + "class_name": "StanzaTokenizer" + } + }, "node_i": "10675", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Uto-Aztecan.json b/data/Uto-Aztecan.json index 6b69db2375fcbb9e9e30d1a82af5ab02d3982d21..4be899c9ee0c53ad713751ccc12b256f975a318b 100644 --- a/data/Uto-Aztecan.json +++ b/data/Uto-Aztecan.json @@ -2,928 +2,928 @@ "name": "Uto-Aztecan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Northern Uto-Aztecan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hopi", "iso_1_code": null, "iso_3_code": "hop", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10735", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "T\u00fcbatulabal", "iso_1_code": null, "iso_3_code": "tub", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10736", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Numic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Comanche", "iso_1_code": null, "iso_3_code": "com", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10739", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Timbisha", "iso_1_code": null, "iso_3_code": "par", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10740", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shoshoni", "iso_1_code": null, "iso_3_code": "shh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10741", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10738", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ute-Southern Paiute", "iso_1_code": null, "iso_3_code": "ute", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10743", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kawaiisu", "iso_1_code": null, "iso_3_code": "xaw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10744", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10742", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mono", "iso_1_code": null, "iso_3_code": "mnr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10746", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Paiute, Northern", "iso_1_code": null, "iso_3_code": "pao", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10747", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10745", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10737", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Takic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Luise\u00f1o", "iso_1_code": null, "iso_3_code": "lui", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10749", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Serrano", "iso_1_code": null, "iso_3_code": "ser", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10750", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gabrielino-Fernande\u00f1o", "iso_1_code": null, "iso_3_code": "xgf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10751", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cahuilla", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cahuilla", "iso_1_code": null, "iso_3_code": "chl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10753", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cupe\u00f1o", "iso_1_code": null, "iso_3_code": "cup", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10754", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10752", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10748", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10734", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern Uto-Aztecan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Corachol-Aztecan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pochutec", "iso_1_code": null, "iso_3_code": "xpo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10757", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cora-Huichol", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Huichol", "iso_1_code": null, "iso_3_code": "hch", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10759", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Cora", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cora, Santa Teresa", "iso_1_code": null, "iso_3_code": "cok", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10761", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Cora, El Nayar", "iso_1_code": null, "iso_3_code": "crn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10762", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10760", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10758", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Core Nahua", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nahuat", "iso_1_code": null, "iso_3_code": "ppl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10764", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nahuatl", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nahuatl, Eastern Durango", "iso_1_code": null, "iso_3_code": "azd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10766", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nahuatl, Western Durango", "iso_1_code": null, "iso_3_code": "azn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10767", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nahuatl, Highland Puebla", "iso_1_code": null, "iso_3_code": "azz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10768", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nahuatl, Coatepec", "iso_1_code": null, "iso_3_code": "naz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10769", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nahuatl, Central Huasteca", "iso_1_code": null, "iso_3_code": "nch", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10770", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nahuatl, Northern Puebla", "iso_1_code": null, "iso_3_code": "ncj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10771", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nahuatl, Michoac\u00e1n", "iso_1_code": null, "iso_3_code": "ncl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10772", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nahuatl, Central Puebla", "iso_1_code": null, "iso_3_code": "ncx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10773", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nahuatl, Guerrero", "iso_1_code": null, "iso_3_code": "ngu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10774", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nahuatl, Tabasco", "iso_1_code": null, "iso_3_code": "nhc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10775", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nahuatl, Eastern Huasteca", "iso_1_code": null, "iso_3_code": "nhe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10776", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nahuatl, Tetelcingo", "iso_1_code": null, "iso_3_code": "nhg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10777", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nahuatl, Zacatl\u00e1n-Ahuacatl\u00e1n-Tepetzintla", "iso_1_code": null, "iso_3_code": "nhi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10778", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nahuatl, Isthmus-Cosoleacaque", "iso_1_code": null, "iso_3_code": "nhk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10779", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nahuatl, Morelos", "iso_1_code": null, "iso_3_code": "nhm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10780", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nahuatl, Central", "iso_1_code": null, "iso_3_code": "nhn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10781", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nahuatl, Isthmus-Pajapan", "iso_1_code": null, "iso_3_code": "nhp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10782", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nahuatl, Huaxcaleca", "iso_1_code": null, "iso_3_code": "nhq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10783", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nahuatl, Ometepec", "iso_1_code": null, "iso_3_code": "nht", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10784", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nahuatl, Temascaltepec", "iso_1_code": null, "iso_3_code": "nhv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10785", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nahuatl, Western Huasteca", "iso_1_code": null, "iso_3_code": "nhw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10786", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nahuatl, Isthmus-Mecayapan", "iso_1_code": null, "iso_3_code": "nhx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10787", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nahuatl, Northern Oaxaca", "iso_1_code": null, "iso_3_code": "nhy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10788", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nahuatl, Santa Mar\u00eda la Alta", "iso_1_code": null, "iso_3_code": "nhz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10789", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nahuatl, Orizaba", "iso_1_code": null, "iso_3_code": "nlv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10790", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nahuatl, Southeastern Puebla", "iso_1_code": null, "iso_3_code": "npl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10791", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nahuatl, Sierra Negra", "iso_1_code": null, "iso_3_code": "nsu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10792", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nahuatl, Tlamacazapa", "iso_1_code": null, "iso_3_code": "nuz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10793", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10765", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10763", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10756", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pimic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tohono O\u2019odham", "iso_1_code": null, "iso_3_code": "ood", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10795", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Pima Bajo", "iso_1_code": null, "iso_3_code": "pia", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10796", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tepecano", "iso_1_code": null, "iso_3_code": "tep", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10797", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tepehuan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tepehuan, Northern", "iso_1_code": null, "iso_3_code": "ntp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10799", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tepehuan, Southeastern", "iso_1_code": null, "iso_3_code": "stp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10800", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tepehuan, Southwestern", "iso_1_code": null, "iso_3_code": "tla", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10801", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10798", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10794", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Taracahitic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tubar", "iso_1_code": null, "iso_3_code": "tbu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10803", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cahitan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mayo", "iso_1_code": null, "iso_3_code": "mfy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10805", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yaqui", "iso_1_code": null, "iso_3_code": "yaq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10806", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10804", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Opatan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Eudeve", "iso_1_code": null, "iso_3_code": "eud", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10808", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Opata", "iso_1_code": null, "iso_3_code": "opt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10809", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10807", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tarahumaran", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Huarij\u00edo", "iso_1_code": null, "iso_3_code": "var", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10811", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tarahumara", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tarahumara, Western", "iso_1_code": null, "iso_3_code": "tac", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10813", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tarahumara, Central", "iso_1_code": null, "iso_3_code": "tar", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10814", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tarahumara, Southeastern", "iso_1_code": null, "iso_3_code": "tcu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10815", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tarahumara, Northern", "iso_1_code": null, "iso_3_code": "thh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10816", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tarahumara, Southwestern", "iso_1_code": null, "iso_3_code": "twr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10817", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10812", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10810", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10802", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10755", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10733", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Wakashan.json b/data/Wakashan.json index af4495199987dbec9527b7420996caa6296aa158..e1d3bda8d1c656430d9dad73db77d2d0765e0339 100644 --- a/data/Wakashan.json +++ b/data/Wakashan.json @@ -2,103 +2,103 @@ "name": "Wakashan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Northern Wakashan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Haisla", "iso_1_code": null, "iso_3_code": "has", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10820", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kwakiutlan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Heiltsuk", "iso_1_code": null, "iso_3_code": "hei", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10822", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kwakwala", "iso_1_code": null, "iso_3_code": "kwk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10823", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10821", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10819", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern Wakashan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ditidaht", "iso_1_code": null, "iso_3_code": "dtd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10825", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makah", "iso_1_code": null, "iso_3_code": "myh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10826", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nuu-chah-nulth", "iso_1_code": null, "iso_3_code": "nuk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10827", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10824", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10818", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/West Papuan.json b/data/West Papuan.json index 5b179336d491a62edbba371e08d2ab147be6aa82..0907d8f75c42c3b1aceafdf463fe9f11e24716d6 100644 --- a/data/West Papuan.json +++ b/data/West Papuan.json @@ -2,327 +2,327 @@ "name": "West Papuan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "North Halmahera", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Galela-Loloda", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Galela", "iso_1_code": null, "iso_3_code": "gbi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10831", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Laba", "iso_1_code": null, "iso_3_code": "lau", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10832", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Loloda", "iso_1_code": null, "iso_3_code": "loa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10833", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Modole", "iso_1_code": null, "iso_3_code": "mqo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10834", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pagu", "iso_1_code": null, "iso_3_code": "pgu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10835", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tabaru", "iso_1_code": null, "iso_3_code": "tby", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10836", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tobelo", "iso_1_code": null, "iso_3_code": "tlb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10837", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tugutil", "iso_1_code": null, "iso_3_code": "tuj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10838", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10830", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sahu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gamkonora", "iso_1_code": null, "iso_3_code": "gak", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10840", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ibu", "iso_1_code": null, "iso_3_code": "ibu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10841", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kao", "iso_1_code": null, "iso_3_code": "kax", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10842", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sahu", "iso_1_code": null, "iso_3_code": "saj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10843", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Waioli", "iso_1_code": null, "iso_3_code": "wli", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10844", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10839", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ternate-Tidore", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ternate", "iso_1_code": null, "iso_3_code": "tft", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10846", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tidore", "iso_1_code": null, "iso_3_code": "tvo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10847", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10845", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Makian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Makian, West", "iso_1_code": null, "iso_3_code": "mqs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10849", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10848", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10829", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Bird\u2019s Head", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tehit", "iso_1_code": null, "iso_3_code": "kps", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10851", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kalabra", "iso_1_code": null, "iso_3_code": "kzz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10852", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Moraid", "iso_1_code": null, "iso_3_code": "msg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10853", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Moi Kelim", "iso_1_code": null, "iso_3_code": "mxn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10854", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Moi Lemas", "iso_1_code": null, "iso_3_code": "sbg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10855", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10850", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yapen", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Saweru", "iso_1_code": null, "iso_3_code": "swr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10857", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yawa", "iso_1_code": null, "iso_3_code": "yva", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10858", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10856", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10828", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Wintuan.json b/data/Wintuan.json index 7a27d1c24580fa9d7203d007a5748cbbd5e41a1b..5bcfe5b8d2c5dda343325d821b8393c83750ecb2 100644 --- a/data/Wintuan.json +++ b/data/Wintuan.json @@ -2,40 +2,40 @@ "name": "Wintuan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nomlaki", "iso_1_code": null, "iso_3_code": "nol", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10860", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Patwin", "iso_1_code": null, "iso_3_code": "pwi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10861", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wintu", "iso_1_code": null, "iso_3_code": "wnw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10862", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10859", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Witotoan.json b/data/Witotoan.json index 4f4bbad69f622eda65ab62f5bd71cf95a8ff9ac3..1cadf8b55db5457e2e6846ec5f8ed3562889117a 100644 --- a/data/Witotoan.json +++ b/data/Witotoan.json @@ -2,132 +2,132 @@ "name": "Witotoan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nonuya", "iso_1_code": null, "iso_3_code": "noj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10864", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Proto-Bora-Muinane", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Muinane", "iso_1_code": null, "iso_3_code": "bmr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10866", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bora", "iso_1_code": null, "iso_3_code": "boa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10867", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10865", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Proto-Huitoto-Ocaina", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ocaina", "iso_1_code": null, "iso_3_code": "oca", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10869", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Early Huitoto", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Witoto, Nipode", "iso_1_code": null, "iso_3_code": "hux", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10871", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Proto-Minica-Murui", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Witoto, Minika", "iso_1_code": null, "iso_3_code": "hto", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10873", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Witoto, Murui", "iso_1_code": null, "iso_3_code": "huu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10874", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10872", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10870", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10868", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10863", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Yaguan.json b/data/Yaguan.json index b9b114cdd19a041ab077cba42724e8e033264d2f..7b7154803a9254f6c54099a863bb8082f161e980 100644 --- a/data/Yaguan.json +++ b/data/Yaguan.json @@ -2,32 +2,32 @@ "name": "Yaguan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yagua", "iso_1_code": null, "iso_3_code": "yad", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10876", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yameo", "iso_1_code": null, "iso_3_code": "yme", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10877", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10875", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Yanomaman.json b/data/Yanomaman.json index 2b042f7186841fd485795277a3185d807f28bd6c..a7c9d68592f8073aea510e2cfe5c73c840f33263 100644 --- a/data/Yanomaman.json +++ b/data/Yanomaman.json @@ -2,64 +2,64 @@ "name": "Yanomaman", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yanomam\u00f6", "iso_1_code": null, "iso_3_code": "guu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10879", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ninam", "iso_1_code": null, "iso_3_code": "shb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10880", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yanom\u00e1mi", "iso_1_code": null, "iso_3_code": "wca", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10881", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sanum\u00e1", "iso_1_code": null, "iso_3_code": "xsu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10882", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yaroam\u00eb", "iso_1_code": null, "iso_3_code": "yro", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10883", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10878", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Yele-West New Britain.json b/data/Yele-West New Britain.json index c97b947b0cfc06ea06cb99bd0b77eca300ae7331..21dc8cdc7faeb56ad11428f0e337f891481176fb 100644 --- a/data/Yele-West New Britain.json +++ b/data/Yele-West New Britain.json @@ -2,66 +2,66 @@ "name": "Yele-West New Britain", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "West New Britain", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Anem", "iso_1_code": null, "iso_3_code": "anz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10886", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pele-Ata", "iso_1_code": null, "iso_3_code": "ata", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10887", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10885", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yele", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Y\u00e9l\u00ee Dnye", "iso_1_code": null, "iso_3_code": "yle", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10889", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10888", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10884", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Yeniseian.json b/data/Yeniseian.json index c76e57d1bf55e27e17509775b7e26bb76d4c0fa0..f3c5e191d1122e17a7a5c500b952c27acda58aa5 100644 --- a/data/Yeniseian.json +++ b/data/Yeniseian.json @@ -2,60 +2,60 @@ "name": "Yeniseian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ket", "iso_1_code": null, "iso_3_code": "ket", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10891", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pumpokol", "iso_1_code": null, "iso_3_code": "xpm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10892", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Arin", "iso_1_code": null, "iso_3_code": "xrn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10893", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yug", "iso_1_code": null, "iso_3_code": "yug", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10894", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kott", "iso_1_code": null, "iso_3_code": "zko", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10895", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10890", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Yokutsan.json b/data/Yokutsan.json index 382c5b1c28969c779c1165c15dce9af31ac7abf0..9f6da8f723efecd96a2dc11dd2111c5d44483d1d 100644 --- a/data/Yokutsan.json +++ b/data/Yokutsan.json @@ -2,20 +2,20 @@ "name": "Yokutsan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yokuts", "iso_1_code": null, "iso_3_code": "yok", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10897", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10896", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Yuat.json b/data/Yuat.json index e9e3877bd3707875a757ae3559713a271b0c7655..c95e6e4d2766319b9c00b2e80089dd38d5f0f19e 100644 --- a/data/Yuat.json +++ b/data/Yuat.json @@ -2,62 +2,62 @@ "name": "Yuat", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bun", "iso_1_code": null, "iso_3_code": "buv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10899", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Biwat", "iso_1_code": null, "iso_3_code": "bwm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10900", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Changriwa", "iso_1_code": null, "iso_3_code": "cga", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10901", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kyenele", "iso_1_code": null, "iso_3_code": "kql", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10902", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mekmek", "iso_1_code": null, "iso_3_code": "mvk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10903", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10898", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Yukaghir.json b/data/Yukaghir.json index e0250e64b6c1b7e3f58f8068f6ed1f9d5e390712..a5667a4924fe8d083116b7b6da3162cc65b8d4d7 100644 --- a/data/Yukaghir.json +++ b/data/Yukaghir.json @@ -2,50 +2,50 @@ "name": "Yukaghir", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Omok", "iso_1_code": null, "iso_3_code": "omk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10905", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chuvantsy", "iso_1_code": null, "iso_3_code": "xcv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10906", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yukaghir, Northern", "iso_1_code": null, "iso_3_code": "ykg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10907", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yukaghir, Southern", "iso_1_code": null, "iso_3_code": "yux", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10908", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10904", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Yukian.json b/data/Yukian.json index 4cb4ee183891c2787d321bf3958f9569dd482dcf..1a1eda57e90d3c096bec58dd47adb33ae9aaec59 100644 --- a/data/Yukian.json +++ b/data/Yukian.json @@ -2,41 +2,41 @@ "name": "Yukian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Wappo", "iso_1_code": null, "iso_3_code": "wao", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10910", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Core Yukian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yuki", "iso_1_code": null, "iso_3_code": "yuk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10912", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10911", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10909", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Zamucoan.json b/data/Zamucoan.json index 28e2da0ff3eea00f9d7c590fff78b675af6fb471..a0171f602a791ea3d51f9cb8d1920b505fb52594 100644 --- a/data/Zamucoan.json +++ b/data/Zamucoan.json @@ -2,34 +2,34 @@ "name": "Zamucoan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ayoreo", "iso_1_code": null, "iso_3_code": "ayo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10914", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chamacoco", "iso_1_code": null, "iso_3_code": "ceg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10915", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10913", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/data/Zaparoan.json b/data/Zaparoan.json index e7a5fac1c71d59304a60536756b77227a57c90a7..7d9cfa74bff69b207ad2697052100b5588de55bd 100644 --- a/data/Zaparoan.json +++ b/data/Zaparoan.json @@ -2,95 +2,95 @@ "name": "Zaparoan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Iquito-Cahuarano", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cahuarano", "iso_1_code": null, "iso_3_code": "cah", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10918", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Iquitu", "iso_1_code": null, "iso_3_code": "iqu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10919", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10917", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Z\u00e1paro", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Z\u00e1paro", "iso_1_code": null, "iso_3_code": "zro", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10921", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Arabela-Andoa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Andoa", "iso_1_code": null, "iso_3_code": "anb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10923", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Arabela", "iso_1_code": null, "iso_3_code": "arl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "10924", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "10922", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10920", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "10916", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file diff --git a/script.js b/script.js index 24bbbc8427d5e816ee358377400d8413c3f6c910..87bd960908c1811bb0f8e0bb059dc84ccd42188c 100644 --- a/script.js +++ b/script.js @@ -220,7 +220,7 @@ document.addEventListener("DOMContentLoaded", function() { const tokenizerName = getColorTokenizer(d.data); return color(tokenizerName); }); - node.filter(d => d.data.iso_3_code && !d.data.own_tokenizer) // Select leaf nodes + node.filter(d => d.data.iso_3_code && d.data.native_tokenizers.length === 0) // Select leaf nodes .append('rect') .attr('width', 10) .attr('height', 10) @@ -230,7 +230,7 @@ document.addEventListener("DOMContentLoaded", function() { const tokenizerName = getColorTokenizer(d.data); return color(tokenizerName); }); - node.filter(d => d.data.own_tokenizer) // Select leaf nodes with "own" assignment + node.filter(d => d.data.native_tokenizers.length !== 0) // Select leaf nodes with "own" assignment .append('path') .attr('d', d3.symbol().type(d3.symbolTriangle).size(100)) // Adjust size as needed .attr('fill', d => { @@ -271,7 +271,8 @@ document.addEventListener("DOMContentLoaded", function() { hoverBox.style.display = "block"; hoverBox.style.left = (event.pageX) + "px"; hoverBox.style.top = (event.pageY) + "px"; - const tokenizersList = Object.keys(d.data.tokenizers).map((script) => `