diff --git "a/data/Austronesian.json" "b/data/Austronesian.json" --- "a/data/Austronesian.json" +++ "b/data/Austronesian.json" @@ -2,35727 +2,24499 @@ "name": "Austronesian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Atayalic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Atayal", "iso_1_code": null, "iso_3_code": "tay", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1438", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sediq", "iso_1_code": null, "iso_3_code": "trv", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1439", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1437", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bunun", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bunun", "iso_1_code": null, "iso_3_code": "bnn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1441", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1440", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East Formosan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Amis", "iso_1_code": null, "iso_3_code": "ami", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1444", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sakizaya", "iso_1_code": null, "iso_3_code": "szy", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1445", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1443", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Basay", "iso_1_code": null, "iso_3_code": "byq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1447", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kavalan", "iso_1_code": null, "iso_3_code": "ckv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1448", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1446", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southwest", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Siraya", "iso_1_code": null, "iso_3_code": "fos", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1450", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Taivoan", "iso_1_code": null, "iso_3_code": "tvx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1451", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1449", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1442", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malayo-Polynesian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Agta, Villa Viciosa", "iso_1_code": null, "iso_3_code": "dyg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1453", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bali-Sasak-Sumbawa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bali", "iso_1_code": null, "iso_3_code": "ban", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1455", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sasak-Sumbawa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Sasak", "iso_1_code": null, "iso_3_code": "sas", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1457", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sumbawa", "iso_1_code": null, "iso_3_code": "smw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1458", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1456", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1454", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bashiic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Ivatan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Ibatan", "iso_1_code": null, "iso_3_code": "ivb", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1461", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ivatan", "iso_1_code": null, "iso_3_code": "ivv", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1462", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1460", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yami", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yami", "iso_1_code": null, "iso_3_code": "tao", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1464", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1463", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1459", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bilic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bagobo-Klata", "iso_1_code": null, "iso_3_code": "bgi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1466", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teduray", "iso_1_code": null, "iso_3_code": "tiy", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1467", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Blaan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Blaan, Koronadal", "iso_1_code": null, "iso_3_code": "bpr", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1469", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Blaan, Sarangani", "iso_1_code": null, "iso_3_code": "bps", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1470", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1468", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tboli", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Tboli", "iso_1_code": null, "iso_3_code": "tbl", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1472", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1471", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1465", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Celebic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Saluan-Banggai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Banggai", "iso_1_code": null, "iso_3_code": "bgz", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1477", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Balantak", "iso_1_code": null, "iso_3_code": "blz", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1478", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1476", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Andio", "iso_1_code": null, "iso_3_code": "bzb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1480", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saluanic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bobongko", "iso_1_code": null, "iso_3_code": "bgb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1482", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saluan", "iso_1_code": null, "iso_3_code": "loe", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1483", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Batui", "iso_1_code": null, "iso_3_code": "zbt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1484", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1481", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1479", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1475", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southeastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bungku-Tolaki", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "East Coast", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bungku", "iso_1_code": null, "iso_3_code": "bkz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1489", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bahonsuai", "iso_1_code": null, "iso_3_code": "bsu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1490", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wawonii", "iso_1_code": null, "iso_3_code": "wow", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1491", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mori Bawah", "iso_1_code": null, "iso_3_code": "xmz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1492", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kulisusu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Taloki", "iso_1_code": null, "iso_3_code": "tlk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1494", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kulisusu", "iso_1_code": null, "iso_3_code": "vkl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1495", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koroni", "iso_1_code": null, "iso_3_code": "xkq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1496", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1493", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1488", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southwest", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Moronene", "iso_1_code": null, "iso_3_code": "mqn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1498", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1497", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1487", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Interior", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mori Atas", "iso_1_code": null, "iso_3_code": "mzq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1501", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Padoe", "iso_1_code": null, "iso_3_code": "pdo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1502", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tomadino", "iso_1_code": null, "iso_3_code": "tdi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1503", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1500", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Coast", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tolaki", "iso_1_code": null, "iso_3_code": "lbw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1505", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rahambuu", "iso_1_code": null, "iso_3_code": "raz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1506", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kodeoha", "iso_1_code": null, "iso_3_code": "vko", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1507", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waru", "iso_1_code": null, "iso_3_code": "wru", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1508", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1504", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1499", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1486", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Muna-Buton", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Nuclear Muna-Buton", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Buton", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "East Buton", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lasalimu", "iso_1_code": null, "iso_3_code": "llm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1513", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kumbewaha", "iso_1_code": null, "iso_3_code": "xks", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1514", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1512", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Buton", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cia-Cia", "iso_1_code": null, "iso_3_code": "cia", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1516", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1515", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1511", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Munan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Busoa", "iso_1_code": null, "iso_3_code": "bup", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1518", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Munic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kaimbulawa", "iso_1_code": null, "iso_3_code": "zka", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1520", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Liabuku", "iso_1_code": null, "iso_3_code": "lix", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1522", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Muna", "iso_1_code": null, "iso_3_code": "mnb", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1523", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Pancana", "iso_1_code": null, "iso_3_code": "pnp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1524", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kioko", "iso_1_code": null, "iso_3_code": "ues", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1525", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1521", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1519", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1517", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1510", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tukangbesi-Bonerate", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tukang Besi South", "iso_1_code": null, "iso_3_code": "bhq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1527", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bonerate", "iso_1_code": null, "iso_3_code": "bna", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1528", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tukang Besi North", "iso_1_code": null, "iso_3_code": "khc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1529", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1526", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1509", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1485", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1474", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaili-Pamona", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kaili", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Baras", "iso_1_code": null, "iso_3_code": "brs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1533", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tado", "iso_1_code": null, "iso_3_code": "klw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1534", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaili, Da\u2019a", "iso_1_code": null, "iso_3_code": "kzf", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1535", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kaili, Ledo", "iso_1_code": null, "iso_3_code": "lew", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1536", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Moma", "iso_1_code": null, "iso_3_code": "myl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1537", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Topoiyo", "iso_1_code": null, "iso_3_code": "toy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1538", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sedoa", "iso_1_code": null, "iso_3_code": "tvw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1539", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaili, Unde", "iso_1_code": null, "iso_3_code": "unz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1540", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1532", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pamona", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Pamona", "iso_1_code": null, "iso_3_code": "pmf", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1542", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tombelala", "iso_1_code": null, "iso_3_code": "ttp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1543", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1541", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1531", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Rampi", "iso_1_code": null, "iso_3_code": "lje", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1545", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uma", "iso_1_code": null, "iso_3_code": "ppk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1546", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sarudu", "iso_1_code": null, "iso_3_code": "sdu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1547", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Badaic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Behoa", "iso_1_code": null, "iso_3_code": "bep", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1549", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bada", "iso_1_code": null, "iso_3_code": "bhz", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1550", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Napu", "iso_1_code": null, "iso_3_code": "npy", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1551", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1548", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1544", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1530", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tomini-Tolitoli", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Tolitoli", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Boano", "iso_1_code": null, "iso_3_code": "bzl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1554", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Totoli", "iso_1_code": null, "iso_3_code": "txe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1555", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1553", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tomini", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Dondo", "iso_1_code": null, "iso_3_code": "dok", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1558", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lauje", "iso_1_code": null, "iso_3_code": "law", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1559", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tomini", "iso_1_code": null, "iso_3_code": "txm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1560", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1557", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Balaesang", "iso_1_code": null, "iso_3_code": "bls", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1562", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dampelas", "iso_1_code": null, "iso_3_code": "dms", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1563", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Taje", "iso_1_code": null, "iso_3_code": "pee", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1564", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tajio", "iso_1_code": null, "iso_3_code": "tdj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1565", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pendau", "iso_1_code": null, "iso_3_code": "ums", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1566", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1561", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1556", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1552", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wotu-Wolio", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Wotu", "iso_1_code": null, "iso_3_code": "wtw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1568", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kalao", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kalao", "iso_1_code": null, "iso_3_code": "kly", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1570", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Laiyolo", "iso_1_code": null, "iso_3_code": "lji", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1571", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1569", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wolio-Kamaru", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kamaru", "iso_1_code": null, "iso_3_code": "kgx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1573", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wolio", "iso_1_code": null, "iso_3_code": "wlo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1574", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1572", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1567", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1473", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central Luzon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Pampangan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kapampangan", "iso_1_code": null, "iso_3_code": "pam", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1577", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1576", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sambalic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Ayta, Ambala", "iso_1_code": null, "iso_3_code": "abc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1579", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ayta, Abellen", "iso_1_code": null, "iso_3_code": "abp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1580", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ayta, Magbukun", "iso_1_code": null, "iso_3_code": "ayt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1581", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ayta, Mag-Indi", "iso_1_code": null, "iso_3_code": "blx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1582", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sambal, Botolan", "iso_1_code": null, "iso_3_code": "sbl", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1583", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ayta, Mag-antsi", "iso_1_code": null, "iso_3_code": "sgb", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1584", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bolinao", "iso_1_code": null, "iso_3_code": "smk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1585", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sambal", "iso_1_code": null, "iso_3_code": "xsb", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1586", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1578", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sinauna", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dumagat, Remontado", "iso_1_code": null, "iso_3_code": "agv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1588", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1587", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1575", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central-Eastern Malayo-Polynesian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Aru", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Barakai", "iso_1_code": null, "iso_3_code": "baj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1591", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gwatlelir", "iso_1_code": null, "iso_3_code": "bay", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1592", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koba", "iso_1_code": null, "iso_3_code": "kpd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1593", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dobel", "iso_1_code": null, "iso_3_code": "kvo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1594", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kompane", "iso_1_code": null, "iso_3_code": "kvp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1595", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kola", "iso_1_code": null, "iso_3_code": "kvv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1596", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karey", "iso_1_code": null, "iso_3_code": "kyd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1597", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lola", "iso_1_code": null, "iso_3_code": "lcd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1598", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lorang", "iso_1_code": null, "iso_3_code": "lrn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1599", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mariri", "iso_1_code": null, "iso_3_code": "mqi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1600", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tarangan, East", "iso_1_code": null, "iso_3_code": "tre", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1601", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tarangan, West", "iso_1_code": null, "iso_3_code": "txn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1602", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ujir", "iso_1_code": null, "iso_3_code": "udj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1603", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manombai", "iso_1_code": null, "iso_3_code": "woo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1604", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1590", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Babar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Babar, North", "iso_1_code": null, "iso_3_code": "bcd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1607", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dawera-Daweloor", "iso_1_code": null, "iso_3_code": "ddw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1608", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dai", "iso_1_code": null, "iso_3_code": "dij", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1609", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1606", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Masela-South Babar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Masela, West", "iso_1_code": null, "iso_3_code": "mss", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1612", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Masela, Central", "iso_1_code": null, "iso_3_code": "mxz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1613", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Serili", "iso_1_code": null, "iso_3_code": "sve", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1614", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Babar, Southeast", "iso_1_code": null, "iso_3_code": "vbb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1615", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Masela, East", "iso_1_code": null, "iso_3_code": "vme", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1616", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1611", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southwest Babar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Emplawas", "iso_1_code": null, "iso_3_code": "emw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1618", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Imroing", "iso_1_code": null, "iso_3_code": "imr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1619", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tela-Masbuar", "iso_1_code": null, "iso_3_code": "tvm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1620", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1617", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1610", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1605", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bima-Lembata", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Adonara", "iso_1_code": null, "iso_3_code": "adr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1622", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Alor", "iso_1_code": null, "iso_3_code": "aol", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1623", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bima", "iso_1_code": null, "iso_3_code": "bhp", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1624", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ende", "iso_1_code": null, "iso_3_code": "end", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1625", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ile Ape", "iso_1_code": null, "iso_3_code": "ila", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1626", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kedang", "iso_1_code": null, "iso_3_code": "ksx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1627", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kepo\u2019", "iso_1_code": null, "iso_3_code": "kuk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1628", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Komodo", "iso_1_code": null, "iso_3_code": "kvh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1629", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Li\u2019o", "iso_1_code": null, "iso_3_code": "ljl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1630", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Painara", "iso_1_code": null, "iso_3_code": "lmf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1631", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Labalekan-Mingar", "iso_1_code": null, "iso_3_code": "lmj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1632", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lamatuka", "iso_1_code": null, "iso_3_code": "lmq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1633", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lamalera", "iso_1_code": null, "iso_3_code": "lmr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1634", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Levuka", "iso_1_code": null, "iso_3_code": "lvu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1635", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lewoeleng", "iso_1_code": null, "iso_3_code": "lwe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1636", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lewotobi", "iso_1_code": null, "iso_3_code": "lwt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1637", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manggarai", "iso_1_code": null, "iso_3_code": "mqy", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1638", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ngad\u2019a, Eastern", "iso_1_code": null, "iso_3_code": "nea", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1639", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nage", "iso_1_code": null, "iso_3_code": "nxe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1640", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngad\u2019a", "iso_1_code": null, "iso_3_code": "nxg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1641", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Palu\u2019e", "iso_1_code": null, "iso_3_code": "ple", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1642", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rembong", "iso_1_code": null, "iso_3_code": "reb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1643", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Riung", "iso_1_code": null, "iso_3_code": "riu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1644", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rajong", "iso_1_code": null, "iso_3_code": "rjg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1645", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rongga", "iso_1_code": null, "iso_3_code": "ror", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1646", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sika", "iso_1_code": null, "iso_3_code": "ski", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1647", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lamaholot", "iso_1_code": null, "iso_3_code": "slp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1648", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "So\u2019a", "iso_1_code": null, "iso_3_code": "ssq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1649", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wae Rana", "iso_1_code": null, "iso_3_code": "wrx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1650", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ke\u2019o", "iso_1_code": null, "iso_3_code": "xxk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1651", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1621", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central Maluku", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Ambelau", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ambelau", "iso_1_code": null, "iso_3_code": "amv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1654", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1653", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Buru", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lisela", "iso_1_code": null, "iso_3_code": "lcl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1656", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Buru", "iso_1_code": null, "iso_3_code": "mhs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1657", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Moksela", "iso_1_code": null, "iso_3_code": "vms", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1658", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1655", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Manipa", "iso_1_code": null, "iso_3_code": "mqp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1660", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Banda-Geser", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Banda", "iso_1_code": null, "iso_3_code": "bnd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1662", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Geser-Gorom", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bati", "iso_1_code": null, "iso_3_code": "bvt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1664", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Geser-Gorom", "iso_1_code": null, "iso_3_code": "ges", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1665", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Watubela", "iso_1_code": null, "iso_3_code": "wah", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1666", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1663", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1661", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Seram", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bobot", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bobot", "iso_1_code": null, "iso_3_code": "bty", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1669", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1668", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East Seram", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hoti", "iso_1_code": null, "iso_3_code": "hti", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1671", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1670", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manusela-Seti", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Benggoi", "iso_1_code": null, "iso_3_code": "bgy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1673", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Huaulu", "iso_1_code": null, "iso_3_code": "hud", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1674", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Salas", "iso_1_code": null, "iso_3_code": "sgu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1675", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Liana-Seti", "iso_1_code": null, "iso_3_code": "ste", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1676", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sou Upaa", "iso_1_code": null, "iso_3_code": "wha", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1677", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1672", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Masiwang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Masiwang", "iso_1_code": null, "iso_3_code": "bnf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1679", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1678", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nunusaku", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kayeli", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kayeli", "iso_1_code": null, "iso_3_code": "kzl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1682", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1681", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Piru Bay", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Haruku", "iso_1_code": null, "iso_3_code": "hrk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1684", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kaibobo", "iso_1_code": null, "iso_3_code": "kzb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1686", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sepa", "iso_1_code": null, "iso_3_code": "spb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1687", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sou Nama", "iso_1_code": null, "iso_3_code": "tlt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1688", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Seram Straits", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ambon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hitu", "iso_1_code": null, "iso_3_code": "htu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1691", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Laha", "iso_1_code": null, "iso_3_code": "lhh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1692", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tulehu", "iso_1_code": null, "iso_3_code": "tlu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1693", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1690", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Solehua", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Paulohi", "iso_1_code": null, "iso_3_code": "plh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1695", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1694", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uliase", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hatuhaha", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Elpaputi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Amahai", "iso_1_code": null, "iso_3_code": "amq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1699", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nusa Laut", "iso_1_code": null, "iso_3_code": "nul", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1700", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1698", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saparua", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Latu", "iso_1_code": null, "iso_3_code": "ltu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1702", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saparua", "iso_1_code": null, "iso_3_code": "spr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1703", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1701", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1697", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kamarian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kamarian", "iso_1_code": null, "iso_3_code": "kzx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1705", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1704", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1696", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1689", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1685", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Asilulu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Asilulu", "iso_1_code": null, "iso_3_code": "asl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1708", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Seit-Kaitetu", "iso_1_code": null, "iso_3_code": "hik", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1709", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1707", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hoamoal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Larike-Wakasihu", "iso_1_code": null, "iso_3_code": "alo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1711", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Boano", "iso_1_code": null, "iso_3_code": "bzn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1712", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1710", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1706", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1683", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Three Rivers", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Yalahatan", "iso_1_code": null, "iso_3_code": "jal", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1714", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Amalumute", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Northwest Seram", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Luhu", "iso_1_code": null, "iso_3_code": "lcq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1717", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lisabata-Nuniali", "iso_1_code": null, "iso_3_code": "lcs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1718", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hulung", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hulung", "iso_1_code": null, "iso_3_code": "huk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1720", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1719", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Loun", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Loun", "iso_1_code": null, "iso_3_code": "lox", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1722", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1721", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ulat Inai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Alune", "iso_1_code": null, "iso_3_code": "alp", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1724", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Naka\u2019ela", "iso_1_code": null, "iso_3_code": "nae", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1725", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1723", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1716", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1715", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wemale", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Wemale", "iso_1_code": null, "iso_3_code": "weo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1727", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1726", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1713", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1680", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sawai-Nuaulu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nuaulu, North", "iso_1_code": null, "iso_3_code": "nni", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1729", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nuaulu, South", "iso_1_code": null, "iso_3_code": "nxl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1730", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saleman", "iso_1_code": null, "iso_3_code": "sau", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1731", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1728", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1667", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1659", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sula", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mangole", "iso_1_code": null, "iso_3_code": "mqc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1733", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sula", "iso_1_code": null, "iso_3_code": "szn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1734", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Taliabo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kadai", "iso_1_code": null, "iso_3_code": "kzd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1736", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Taliabu", "iso_1_code": null, "iso_3_code": "tlv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1737", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1735", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1732", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1652", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern Malayo-Polynesian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Oceanic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Admiralty Islands", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Manus", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Andra-Hus", "iso_1_code": null, "iso_3_code": "anx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1744", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Elu", "iso_1_code": null, "iso_3_code": "elu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1745", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kurti", "iso_1_code": null, "iso_3_code": "ktm", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1746", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Koro", "iso_1_code": null, "iso_3_code": "kxr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1747", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Leipon", "iso_1_code": null, "iso_3_code": "lek", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1748", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lele", "iso_1_code": null, "iso_3_code": "lle", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1749", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ponam", "iso_1_code": null, "iso_3_code": "ncc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1750", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nali", "iso_1_code": null, "iso_3_code": "nss", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1751", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kele", "iso_1_code": null, "iso_3_code": "sbc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1752", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Titan", "iso_1_code": null, "iso_3_code": "ttv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1753", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ere", "iso_1_code": null, "iso_3_code": "twp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1754", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1743", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mokoreng-Loniu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Loniu", "iso_1_code": null, "iso_3_code": "los", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1756", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Idio", "iso_1_code": null, "iso_3_code": "mft", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1757", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1755", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bipi", "iso_1_code": null, "iso_3_code": "biq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1759", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Likum", "iso_1_code": null, "iso_3_code": "lib", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1760", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyindrou", "iso_1_code": null, "iso_3_code": "lid", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1761", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Hermit", "iso_1_code": null, "iso_3_code": "llf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1762", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mondropolon", "iso_1_code": null, "iso_3_code": "npn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1763", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tulu-Bohuai", "iso_1_code": null, "iso_3_code": "rak", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1764", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sori-Harengan", "iso_1_code": null, "iso_3_code": "sbh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1765", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khehek", "iso_1_code": null, "iso_3_code": "tlx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1766", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1758", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1742", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pak-Tong", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pak-Tong", "iso_1_code": null, "iso_3_code": "pkg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1768", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1767", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southeast Islands", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Paluai", "iso_1_code": null, "iso_3_code": "blq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1770", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lenkau", "iso_1_code": null, "iso_3_code": "ler", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1771", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lou", "iso_1_code": null, "iso_3_code": "loj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1772", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Neherneh", "iso_1_code": null, "iso_3_code": "ncn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1773", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Penchal", "iso_1_code": null, "iso_3_code": "pek", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1774", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1769", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1741", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kaniet", "iso_1_code": null, "iso_3_code": "ktk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1776", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Seimat", "iso_1_code": null, "iso_3_code": "ssg", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1777", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Wuvulu-Aua", "iso_1_code": null, "iso_3_code": "wuv", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1778", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1775", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1740", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central-Eastern Oceanic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Remote Oceanic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Central Pacific", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "East Fijian-Polynesian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "East Fijian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Fijian", "iso_1_code": "fj", "iso_3_code": "fij", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1784", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Gone Dau", "iso_1_code": null, "iso_3_code": "goo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1785", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lauan", "iso_1_code": null, "iso_3_code": "llx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1786", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lomaiviti", "iso_1_code": null, "iso_3_code": "lmv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1787", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1783", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Polynesian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Nuclear", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Rapa", "iso_1_code": null, "iso_3_code": "ray", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1792", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Marquesic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Hawaiian", "iso_1_code": null, "iso_3_code": "haw", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1794", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Marquesan, South", "iso_1_code": null, "iso_3_code": "mqm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1795", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Marquesan, North", "iso_1_code": null, "iso_3_code": "mrq", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1796", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mangareva", "iso_1_code": null, "iso_3_code": "mrv", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1797", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1793", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tahitic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Austral", "iso_1_code": null, "iso_3_code": "aut", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1799", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maori", "iso_1_code": "mi", "iso_3_code": "mri", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1800", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tuamotuan", "iso_1_code": null, "iso_3_code": "pmt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1801", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Penrhyn", "iso_1_code": null, "iso_3_code": "pnh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1802", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cook Islands Maori", "iso_1_code": null, "iso_3_code": "rar", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1803", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Rakahanga-Manihiki", "iso_1_code": null, "iso_3_code": "rkh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1804", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Moriori", "iso_1_code": null, "iso_3_code": "rrm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1805", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tahitian", "iso_1_code": "ty", "iso_3_code": "tah", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1806", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1798", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1791", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rapanui", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Rapa Nui", "iso_1_code": null, "iso_3_code": "rap", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1808", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1807", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1790", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Samoic-Outlier", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "East Uvean-Niuafo\u2019ou", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Niuatoputapu", "iso_1_code": null, "iso_3_code": "nkp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1811", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Niuafo\u2019ou", "iso_1_code": null, "iso_3_code": "num", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1812", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wallisian", "iso_1_code": null, "iso_3_code": "wls", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1813", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1810", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ellicean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kapingamarangi", "iso_1_code": null, "iso_3_code": "kpg", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1815", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Takuu", "iso_1_code": null, "iso_3_code": "nho", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1816", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nukuoro", "iso_1_code": null, "iso_3_code": "nkr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1817", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nukumanu", "iso_1_code": null, "iso_3_code": "nuq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1818", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nukeria", "iso_1_code": null, "iso_3_code": "nur", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1819", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ontong Java", "iso_1_code": null, "iso_3_code": "ojv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1820", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sikaiana", "iso_1_code": null, "iso_3_code": "sky", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1821", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tuvaluan", "iso_1_code": null, "iso_3_code": "tvl", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1822", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1814", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Futunic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Anuta", "iso_1_code": null, "iso_3_code": "aud", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1824", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Futuna, East", "iso_1_code": null, "iso_3_code": "fud", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1825", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Futuna-Aniwa", "iso_1_code": null, "iso_3_code": "fut", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1826", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Emae", "iso_1_code": null, "iso_3_code": "mmw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1827", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rennell-Bellona", "iso_1_code": null, "iso_3_code": "mnv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1828", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mele-Fila", "iso_1_code": null, "iso_3_code": "mxe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1829", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vaeakau-Taumako", "iso_1_code": null, "iso_3_code": "piv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1830", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tikopia", "iso_1_code": null, "iso_3_code": "tkp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1831", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Fagauvea", "iso_1_code": null, "iso_3_code": "uve", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1832", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1823", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pukapuka", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pukapuka", "iso_1_code": null, "iso_3_code": "pkp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1834", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1833", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Samoan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Samoan", "iso_1_code": "sm", "iso_3_code": "smo", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1836", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1835", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tokelauan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Tokelauan", "iso_1_code": null, "iso_3_code": "tkl", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1838", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1837", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "1809", - "scripts": [], - "own_tokenizer": false - } - ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, + "node_i": "1809", + "native_tokenizers": [], + "scripts": [] + } + ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1789", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tongic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Niue", "iso_1_code": null, "iso_3_code": "niu", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1840", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tongan", "iso_1_code": "to", "iso_3_code": "ton", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1841", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1839", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1788", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1782", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Fijian-Rotuman", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Rotuman", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Rotuman", "iso_1_code": null, "iso_3_code": "rtm", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1844", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1843", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Fijian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Namosi-Naitasiri-Serua", "iso_1_code": null, "iso_3_code": "bwb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1846", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Fijian, Western", "iso_1_code": null, "iso_3_code": "wyy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1847", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1845", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1842", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1781", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Loyalty Islands", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Drehu", "iso_1_code": null, "iso_3_code": "dhv", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1849", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Iaai", "iso_1_code": null, "iso_3_code": "iai", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1850", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nengone", "iso_1_code": null, "iso_3_code": "nen", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1851", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1848", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Micronesian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Micronesian Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Ikiribati", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kiribati", "iso_1_code": null, "iso_3_code": "gil", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1855", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1854", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kusaiean", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kosraean", "iso_1_code": null, "iso_3_code": "kos", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1857", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1856", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Marshallese", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Marshallese", "iso_1_code": "mh", "iso_3_code": "mah", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1859", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1858", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pohnpeic-Chuukic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Chuukic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Carolinian", "iso_1_code": null, "iso_3_code": "cal", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1862", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chuukese", "iso_1_code": null, "iso_3_code": "chk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1863", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mapia", "iso_1_code": null, "iso_3_code": "mpy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1864", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mortlockese", "iso_1_code": null, "iso_3_code": "mrl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1865", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Namonuito", "iso_1_code": null, "iso_3_code": "nmt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1866", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "P\u00e1\u00e1fang", "iso_1_code": null, "iso_3_code": "pfa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1867", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Puluwatese", "iso_1_code": null, "iso_3_code": "puw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1868", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sonsorolese", "iso_1_code": null, "iso_3_code": "sov", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1869", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Satawalese", "iso_1_code": null, "iso_3_code": "stw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1870", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tobian", "iso_1_code": null, "iso_3_code": "tox", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1871", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tanapag", "iso_1_code": null, "iso_3_code": "tpv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1872", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ulithian", "iso_1_code": null, "iso_3_code": "uli", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1873", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Woleaian", "iso_1_code": null, "iso_3_code": "woe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1874", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1861", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pohnpeic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Mokilese", "iso_1_code": null, "iso_3_code": "mkj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1876", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pingelapese", "iso_1_code": null, "iso_3_code": "pif", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1877", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pohnpeian", "iso_1_code": null, "iso_3_code": "pon", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1878", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1875", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1860", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1853", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nauruan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nauruan", "iso_1_code": "na", "iso_3_code": "nau", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1880", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1879", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1852", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "New Caledonian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Haekic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Haeke", "iso_1_code": null, "iso_3_code": "aek", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1883", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1882", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Haveke", "iso_1_code": null, "iso_3_code": "hvk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1885", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vamale", "iso_1_code": null, "iso_3_code": "mkt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1886", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Cemuh\u00ee", "iso_1_code": null, "iso_3_code": "cam", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1888", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Paic\u00ee", "iso_1_code": null, "iso_3_code": "pri", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1889", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1887", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Extreme Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Caac", "iso_1_code": null, "iso_3_code": "msq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1891", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "N\u00eal\u00eamwa-Nixumwak", "iso_1_code": null, "iso_3_code": "nee", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1892", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yuanga", "iso_1_code": null, "iso_3_code": "nua", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1893", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyel\u00e2yu", "iso_1_code": null, "iso_3_code": "yly", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1894", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1890", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pwaamei", "iso_1_code": null, "iso_3_code": "pme", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1896", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pwapw\u00e2", "iso_1_code": null, "iso_3_code": "pop", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1897", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hmwaveke", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bwatoo", "iso_1_code": null, "iso_3_code": "bwa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1899", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hmwaveke", "iso_1_code": null, "iso_3_code": "mrk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1900", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waamwang", "iso_1_code": null, "iso_3_code": "wmn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1901", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1898", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nemi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Fw\u00e2i", "iso_1_code": null, "iso_3_code": "fwa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1903", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jawe", "iso_1_code": null, "iso_3_code": "jaz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1904", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nemi", "iso_1_code": null, "iso_3_code": "nem", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1905", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pije", "iso_1_code": null, "iso_3_code": "piz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1906", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1902", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1895", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1884", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Extreme Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Drubea", "iso_1_code": null, "iso_3_code": "duf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1909", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Num\u00e8\u00e8", "iso_1_code": null, "iso_3_code": "kdk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1910", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1908", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Wailic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Aji\u00eb", "iso_1_code": null, "iso_3_code": "aji", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1913", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Arh\u00f6", "iso_1_code": null, "iso_3_code": "aok", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1914", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Arh\u00e2", "iso_1_code": null, "iso_3_code": "aqr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1915", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Orowe", "iso_1_code": null, "iso_3_code": "bpk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1916", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Neku", "iso_1_code": null, "iso_3_code": "nek", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1917", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1912", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Xaracuu-Xaragure", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "X\u00e2r\u00e2c\u00f9\u00f9", "iso_1_code": null, "iso_3_code": "ane", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1919", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "X\u00e2r\u00e2gur\u00e8", "iso_1_code": null, "iso_3_code": "axx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1920", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1918", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zire-Tiri", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "T\u00eer\u00ee", "iso_1_code": null, "iso_3_code": "cir", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1922", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "S\u00eesh\u00eb\u00eb", "iso_1_code": null, "iso_3_code": "sih", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1923", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1921", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1911", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1907", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1881", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North and Central Vanuatu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "East Santo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "N\u2019kep", "iso_1_code": null, "iso_3_code": "sku", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1927", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1926", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Butmas-Tur", "iso_1_code": null, "iso_3_code": "bnr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1929", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lorediakarkar", "iso_1_code": null, "iso_3_code": "lnn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1930", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Atin", "iso_1_code": null, "iso_3_code": "plb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1931", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngen", "iso_1_code": null, "iso_3_code": "ssv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1932", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1928", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1925", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malekula Interior", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Labo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ninde", "iso_1_code": null, "iso_3_code": "mwi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1935", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1934", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malekula Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Neverver", "iso_1_code": null, "iso_3_code": "lgk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1937", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Larevat", "iso_1_code": null, "iso_3_code": "lrv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1938", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Litzlitz", "iso_1_code": null, "iso_3_code": "lzl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1939", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maragus", "iso_1_code": null, "iso_3_code": "mrs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1940", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "V\u2019\u00ebnen Taut", "iso_1_code": null, "iso_3_code": "nmb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1941", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nasarian", "iso_1_code": null, "iso_3_code": "nvh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1942", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Avava", "iso_1_code": null, "iso_3_code": "tmb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1943", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Neve\u2019ei", "iso_1_code": null, "iso_3_code": "vnm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1944", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1936", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Small Nambas", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dixon Reef", "iso_1_code": null, "iso_3_code": "dix", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1946", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Letemboi", "iso_1_code": null, "iso_3_code": "nms", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1947", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Repanbitip", "iso_1_code": null, "iso_3_code": "rpn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1948", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1945", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1933", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northeast Vanuatu-Banks Islands", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Central Vanuatu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Efate, South", "iso_1_code": null, "iso_3_code": "erk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1951", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eton", "iso_1_code": null, "iso_3_code": "etn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1952", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Efate, North", "iso_1_code": null, "iso_3_code": "llp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1953", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lelepa", "iso_1_code": null, "iso_3_code": "lpa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1954", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Namakura", "iso_1_code": null, "iso_3_code": "nmk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1955", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1950", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East Vanuatu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Apma", "iso_1_code": null, "iso_3_code": "app", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1957", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Daakaka", "iso_1_code": null, "iso_3_code": "bpa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1958", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Baetora", "iso_1_code": null, "iso_3_code": "btr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1959", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lonwolwol", "iso_1_code": null, "iso_3_code": "crc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1960", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Fanbak", "iso_1_code": null, "iso_3_code": "fnb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1961", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hiw", "iso_1_code": null, "iso_3_code": "hiw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1962", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koro", "iso_1_code": null, "iso_3_code": "krf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1963", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lo-Toga", "iso_1_code": null, "iso_3_code": "lht", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1964", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lakon", "iso_1_code": null, "iso_3_code": "lkn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1965", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hano", "iso_1_code": null, "iso_3_code": "lml", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1966", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lemerig", "iso_1_code": null, "iso_3_code": "lrz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1967", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mwotlap", "iso_1_code": null, "iso_3_code": "mlv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1968", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ambrym, North", "iso_1_code": null, "iso_3_code": "mmg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1969", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Marino", "iso_1_code": null, "iso_3_code": "mrb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1970", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mwerlap", "iso_1_code": null, "iso_3_code": "mrm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1971", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vur\u00ebs", "iso_1_code": null, "iso_3_code": "msn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1972", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mota", "iso_1_code": null, "iso_3_code": "mtt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1973", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maewo, Central", "iso_1_code": null, "iso_3_code": "mwo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1974", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ambae, West", "iso_1_code": null, "iso_3_code": "nnd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1975", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Olrat", "iso_1_code": null, "iso_3_code": "olr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1976", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ambae, East", "iso_1_code": null, "iso_3_code": "omb", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1977", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Paama", "iso_1_code": null, "iso_3_code": "pma", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1978", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Port Vato", "iso_1_code": null, "iso_3_code": "ptv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1979", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sa", "iso_1_code": null, "iso_3_code": "sax", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1980", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ske", "iso_1_code": null, "iso_3_code": "ske", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1981", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sowa", "iso_1_code": null, "iso_3_code": "sww", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1982", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nume", "iso_1_code": null, "iso_3_code": "tgs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1983", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lehali", "iso_1_code": null, "iso_3_code": "tql", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1984", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ambrym, Southeast", "iso_1_code": null, "iso_3_code": "tvk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1985", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "L\u00f6y\u00f6p", "iso_1_code": null, "iso_3_code": "urr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1986", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vera\u2019a", "iso_1_code": null, "iso_3_code": "vra", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1987", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dorig", "iso_1_code": null, "iso_3_code": "wwo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1988", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1956", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Epi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bieria-Maii", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bieria", "iso_1_code": null, "iso_3_code": "brj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1991", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maii", "iso_1_code": null, "iso_3_code": "mmm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1992", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1990", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lamenu-Baki", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Baki-Bierebo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Baki", "iso_1_code": null, "iso_3_code": "bki", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1995", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bierebo", "iso_1_code": null, "iso_3_code": "bnk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1996", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1994", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lamenu-Lewo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Lamenu", "iso_1_code": null, "iso_3_code": "lmu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1998", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lewo", "iso_1_code": null, "iso_3_code": "lww", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1999", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1997", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1993", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1989", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malekula Coastal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Axamb", "iso_1_code": null, "iso_3_code": "ahb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2001", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aulua", "iso_1_code": null, "iso_3_code": "aul", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2002", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maskelynes", "iso_1_code": null, "iso_3_code": "klv", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2003", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Malua Bay", "iso_1_code": null, "iso_3_code": "mll", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2004", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Na\u2019ahai", "iso_1_code": null, "iso_3_code": "mlx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2005", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mae", "iso_1_code": null, "iso_3_code": "mme", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2006", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mpotovoro", "iso_1_code": null, "iso_3_code": "mvt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2007", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unua", "iso_1_code": null, "iso_3_code": "onu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2008", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rerep", "iso_1_code": null, "iso_3_code": "pgk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2009", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Port Sandwich", "iso_1_code": null, "iso_3_code": "psw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2010", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nahavaq", "iso_1_code": null, "iso_3_code": "sns", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2011", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uripiv-Wala-Rano-Atchin", "iso_1_code": null, "iso_3_code": "upv", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2012", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Vao", "iso_1_code": null, "iso_3_code": "vao", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2013", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Banam Bay", "iso_1_code": null, "iso_3_code": "vrt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2014", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2000", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Santo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Soro-n Raki", "iso_1_code": null, "iso_3_code": "akr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2016", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Amblong", "iso_1_code": null, "iso_3_code": "alm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2017", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aore", "iso_1_code": null, "iso_3_code": "aor", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2018", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kiae", "iso_1_code": null, "iso_3_code": "frt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2019", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Merei", "iso_1_code": null, "iso_3_code": "lmb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2020", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mafea", "iso_1_code": null, "iso_3_code": "mkv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2021", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malo", "iso_1_code": null, "iso_3_code": "mla", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2022", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tiale", "iso_1_code": null, "iso_3_code": "mnl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2023", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Morouas", "iso_1_code": null, "iso_3_code": "mrp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2024", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tanokuku", "iso_1_code": null, "iso_3_code": "nkk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2025", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Varsaf", "iso_1_code": null, "iso_3_code": "nrg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2026", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Balen", "iso_1_code": null, "iso_3_code": "nsw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2027", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tapiafaru", "iso_1_code": null, "iso_3_code": "ptr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2028", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mores", "iso_1_code": null, "iso_3_code": "rga", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2029", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tangoa", "iso_1_code": null, "iso_3_code": "tgp", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2030", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tolomako", "iso_1_code": null, "iso_3_code": "tlm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2031", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tambotalo", "iso_1_code": null, "iso_3_code": "tls", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2032", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vinekula", "iso_1_code": null, "iso_3_code": "tmi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2033", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oa", "iso_1_code": null, "iso_3_code": "tmt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2034", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Akei", "iso_1_code": null, "iso_3_code": "tsr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2035", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tavanlav", "iso_1_code": null, "iso_3_code": "vlp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2036", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tapesena", "iso_1_code": null, "iso_3_code": "vnp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2037", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Moiso", "iso_1_code": null, "iso_3_code": "wlr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2038", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jo", "iso_1_code": null, "iso_3_code": "wsi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2039", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2015", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1949", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1924", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1780", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South Vanuatu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Aneityum", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aneityum", "iso_1_code": null, "iso_3_code": "aty", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2042", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2041", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Erromanga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sie", "iso_1_code": null, "iso_3_code": "erg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2044", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ifo", "iso_1_code": null, "iso_3_code": "iff", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2045", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ura", "iso_1_code": null, "iso_3_code": "uur", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2046", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2043", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tanna", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Tanna, Southwest", "iso_1_code": null, "iso_3_code": "nwi", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2048", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kwamera", "iso_1_code": null, "iso_3_code": "tnk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2049", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Lenakel", "iso_1_code": null, "iso_3_code": "tnl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2050", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tanna, North", "iso_1_code": null, "iso_3_code": "tnn", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2051", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Whitesands", "iso_1_code": null, "iso_3_code": "tnp", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2052", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2047", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2040", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southeast Solomonic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Gela-Guadalcanal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bughotu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bughotu", "iso_1_code": null, "iso_3_code": "bgt", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2056", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2055", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gela", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Lengo", "iso_1_code": null, "iso_3_code": "lgr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2058", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gela", "iso_1_code": null, "iso_3_code": "nlg", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2059", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2057", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Guadalcanal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Birao", "iso_1_code": null, "iso_3_code": "brr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2061", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ghari", "iso_1_code": null, "iso_3_code": "gri", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2062", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malango", "iso_1_code": null, "iso_3_code": "mln", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2063", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Talise", "iso_1_code": null, "iso_3_code": "tlr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2064", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2060", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2054", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malaita-San Cristobal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Malaita", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Longgu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Longgu", "iso_1_code": null, "iso_3_code": "lgu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2068", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2067", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Baelelea", "iso_1_code": null, "iso_3_code": "bvc", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2070", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Baeggu", "iso_1_code": null, "iso_3_code": "bvd", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2071", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Fataleka", "iso_1_code": null, "iso_3_code": "far", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2072", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Gula\u2019alaa", "iso_1_code": null, "iso_3_code": "gmb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2073", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kwaio", "iso_1_code": null, "iso_3_code": "kwd", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2074", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kwara\u2019ae", "iso_1_code": null, "iso_3_code": "kwf", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2075", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Wala", "iso_1_code": null, "iso_3_code": "lgl", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2076", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Lau", "iso_1_code": null, "iso_3_code": "llu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2077", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "To\u2019abaita", "iso_1_code": null, "iso_3_code": "mlu", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2078", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2069", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "\u2019Are\u2019are", "iso_1_code": null, "iso_3_code": "alu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2080", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sa\ua78ca", "iso_1_code": null, "iso_3_code": "apb", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2081", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Dori\u2019o", "iso_1_code": null, "iso_3_code": "dor", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2082", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oroha", "iso_1_code": null, "iso_3_code": "ora", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2083", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2079", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2066", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "San Cristobal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kahua", "iso_1_code": null, "iso_3_code": "agw", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2085", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Arosi", "iso_1_code": null, "iso_3_code": "aia", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2086", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bauro", "iso_1_code": null, "iso_3_code": "bxa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2087", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Fagani", "iso_1_code": null, "iso_3_code": "faf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2088", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Owa", "iso_1_code": null, "iso_3_code": "stn", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2089", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2084", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2065", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2053", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1779", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "St. Matthias", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Mussau-Emira", "iso_1_code": null, "iso_3_code": "emi", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2091", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tenis", "iso_1_code": null, "iso_3_code": "tns", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2092", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2090", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Temotu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Reefs-Santa Cruz", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "\u00c4iwoo", "iso_1_code": null, "iso_3_code": "nfl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2095", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Engdewu", "iso_1_code": null, "iso_3_code": "ngr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2096", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nal\u00f6go", "iso_1_code": null, "iso_3_code": "nlz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2097", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Noip\u00e4", "iso_1_code": null, "iso_3_code": "npx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2098", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nat\u00fcgu", "iso_1_code": null, "iso_3_code": "ntu", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2099", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2094", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Utupua-Vanikoro", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Teanu", "iso_1_code": null, "iso_3_code": "tkw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2101", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tanema", "iso_1_code": null, "iso_3_code": "tnx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2102", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lovono", "iso_1_code": null, "iso_3_code": "vnk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2103", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Utupua", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Asumboa", "iso_1_code": null, "iso_3_code": "aua", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2105", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tanibili", "iso_1_code": null, "iso_3_code": "tbe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2106", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Amba", "iso_1_code": null, "iso_3_code": "utp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2107", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2104", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2100", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2093", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western Oceanic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Meso Melanesian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bali-Vitu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Uneapa", "iso_1_code": null, "iso_3_code": "bbn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2111", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vitu", "iso_1_code": null, "iso_3_code": "wiv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2112", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2110", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "New Ireland", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Lavongai-Nalik", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Tungag", "iso_1_code": null, "iso_3_code": "lcm", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2115", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kara", "iso_1_code": null, "iso_3_code": "leu", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2116", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Lakurumau", "iso_1_code": null, "iso_3_code": "lxm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2117", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nalik", "iso_1_code": null, "iso_3_code": "nal", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2118", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mandara", "iso_1_code": null, "iso_3_code": "tbf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2119", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tiang", "iso_1_code": null, "iso_3_code": "tbj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2120", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tigak", "iso_1_code": null, "iso_3_code": "tgc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2121", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2114", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Madak", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Barok", "iso_1_code": null, "iso_3_code": "bjk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2123", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lavatbura-Lamusong", "iso_1_code": null, "iso_3_code": "lbv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2124", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Madak", "iso_1_code": null, "iso_3_code": "mmx", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2125", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2122", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South New Ireland-Northwest Solomonic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Minigir", "iso_1_code": null, "iso_3_code": "bxf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2127", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Choiseul", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Babatana", "iso_1_code": null, "iso_3_code": "baa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2129", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ririo", "iso_1_code": null, "iso_3_code": "rri", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2130", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vaghua", "iso_1_code": null, "iso_3_code": "tva", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2131", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Varisi", "iso_1_code": null, "iso_3_code": "vrs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2132", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2128", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mono-Uruava", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mono", "iso_1_code": null, "iso_3_code": "mte", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2134", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Torau", "iso_1_code": null, "iso_3_code": "ttu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2135", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uruava", "iso_1_code": null, "iso_3_code": "urv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2136", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vinitiri", "iso_1_code": null, "iso_3_code": "vmg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2137", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2133", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nehan-North Bougainville", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Buka", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Petats", "iso_1_code": null, "iso_3_code": "pex", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2140", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Halia", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Hak\u00f6", "iso_1_code": null, "iso_3_code": "hao", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2142", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Halia", "iso_1_code": null, "iso_3_code": "hla", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2143", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2141", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2139", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nehan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Nehan", "iso_1_code": null, "iso_3_code": "nsn", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2145", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2144", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Papapana", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Papapana", "iso_1_code": null, "iso_3_code": "ppn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2147", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2146", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saposa-Tinputz", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Hahon", "iso_1_code": null, "iso_3_code": "hah", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2149", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saposa", "iso_1_code": null, "iso_3_code": "sps", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2150", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Teop", "iso_1_code": null, "iso_3_code": "tio", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2151", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tinputz", "iso_1_code": null, "iso_3_code": "tpz", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2152", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2148", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Solos", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Solos", "iso_1_code": null, "iso_3_code": "sol", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2154", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2153", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2138", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "New Georgia", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Vangunu", "iso_1_code": null, "iso_3_code": "mpr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2157", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Marovo", "iso_1_code": null, "iso_3_code": "mvo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2158", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2156", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Ghanongga", "iso_1_code": null, "iso_3_code": "ghn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2160", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hoava", "iso_1_code": null, "iso_3_code": "hoa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2161", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kusaghe", "iso_1_code": null, "iso_3_code": "ksg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2162", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kazukuru", "iso_1_code": null, "iso_3_code": "kzk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2163", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lungga", "iso_1_code": null, "iso_3_code": "lga", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2164", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dughore", "iso_1_code": null, "iso_3_code": "nke", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2165", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Roviana", "iso_1_code": null, "iso_3_code": "rug", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2166", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Simbo", "iso_1_code": null, "iso_3_code": "sbb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2167", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ughele", "iso_1_code": null, "iso_3_code": "uge", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2168", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2159", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2155", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Patpatar-Tolai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Fanamaket", "iso_1_code": null, "iso_3_code": "bjp", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2170", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Patpatar", "iso_1_code": null, "iso_3_code": "gfk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2171", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Guramalum", "iso_1_code": null, "iso_3_code": "grz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2172", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Niwer Mil", "iso_1_code": null, "iso_3_code": "hrc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2173", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Warwar Feni", "iso_1_code": null, "iso_3_code": "hrw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2174", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Konomala", "iso_1_code": null, "iso_3_code": "koa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2175", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kandas", "iso_1_code": null, "iso_3_code": "kqw", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2176", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kuanua", "iso_1_code": null, "iso_3_code": "ksd", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2177", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Label", "iso_1_code": null, "iso_3_code": "lbb", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2178", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ramoaaina", "iso_1_code": null, "iso_3_code": "rai", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2179", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sursurunga", "iso_1_code": null, "iso_3_code": "sgz", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2180", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Siar-Lak", "iso_1_code": null, "iso_3_code": "sjr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2181", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2169", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Piva-Banoni", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bannoni", "iso_1_code": null, "iso_3_code": "bcm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2183", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lawunuia", "iso_1_code": null, "iso_3_code": "tgi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2184", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2182", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Santa Isabel", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Blablanga", "iso_1_code": null, "iso_3_code": "blp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2187", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Zazao", "iso_1_code": null, "iso_3_code": "jaj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2188", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kokota", "iso_1_code": null, "iso_3_code": "kkk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2189", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2186", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gao", "iso_1_code": null, "iso_3_code": "gga", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2191", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cheke Holo", "iso_1_code": null, "iso_3_code": "mrn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2192", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2190", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Zabana", "iso_1_code": null, "iso_3_code": "kji", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2194", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Laghu", "iso_1_code": null, "iso_3_code": "lgb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2195", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2193", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2185", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2126", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tabar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lihir", "iso_1_code": null, "iso_3_code": "lih", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2197", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Notsi", "iso_1_code": null, "iso_3_code": "ncf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2198", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2196", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tomoip", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tomoip", "iso_1_code": null, "iso_3_code": "tqp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2200", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2199", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2113", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Willaumez", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bulu", "iso_1_code": null, "iso_3_code": "bjl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2202", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bola", "iso_1_code": null, "iso_3_code": "bnp", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2203", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Meramera", "iso_1_code": null, "iso_3_code": "mxm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2204", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nakanai", "iso_1_code": null, "iso_3_code": "nak", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2205", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2201", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2109", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North New Guinea", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Huon Gulf", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Markham", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Lower", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Busu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Duwet", "iso_1_code": null, "iso_3_code": "gve", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2211", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aribwatsa", "iso_1_code": null, "iso_3_code": "laz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2212", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Musom", "iso_1_code": null, "iso_3_code": "msu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2213", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nafi", "iso_1_code": null, "iso_3_code": "srf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2214", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aribwaung", "iso_1_code": null, "iso_3_code": "ylu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2215", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2210", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Labu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Labu", "iso_1_code": null, "iso_3_code": "lbu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2217", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2216", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wampar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Wampar", "iso_1_code": null, "iso_3_code": "lbq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2219", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2218", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2209", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Upper", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Adzera", "iso_1_code": null, "iso_3_code": "adz", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2221", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mountain", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mari", "iso_1_code": null, "iso_3_code": "hob", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2223", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wampur", "iso_1_code": null, "iso_3_code": "waz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2224", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sarasira", "iso_1_code": null, "iso_3_code": "zsa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2225", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sukurum", "iso_1_code": null, "iso_3_code": "zsu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2226", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2222", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2220", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Watut", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kodut, South", "iso_1_code": null, "iso_3_code": "mcy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2228", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kodut, Middle", "iso_1_code": null, "iso_3_code": "mpl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2229", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kodut, North", "iso_1_code": null, "iso_3_code": "una", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2230", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2227", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2208", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bugawac", "iso_1_code": null, "iso_3_code": "buk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2232", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yabem", "iso_1_code": null, "iso_3_code": "jae", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2233", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kala", "iso_1_code": null, "iso_3_code": "kcl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2234", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2231", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Numbami", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Numbami", "iso_1_code": null, "iso_3_code": "sij", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2236", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2235", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Hote-Buang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Buang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Buang, Central", "iso_1_code": null, "iso_3_code": "bzh", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2240", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Buang, Mangga", "iso_1_code": null, "iso_3_code": "mmo", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2241", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Piu", "iso_1_code": null, "iso_3_code": "pix", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2242", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kapin", "iso_1_code": null, "iso_3_code": "tbx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2243", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vehes", "iso_1_code": null, "iso_3_code": "val", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2244", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mumeng", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Dambi", "iso_1_code": null, "iso_3_code": "dac", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2246", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gorakor", "iso_1_code": null, "iso_3_code": "goc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2247", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kumalu", "iso_1_code": null, "iso_3_code": "ksl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2248", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Patep", "iso_1_code": null, "iso_3_code": "ptp", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2249", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Zenag", "iso_1_code": null, "iso_3_code": "zeg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2250", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2245", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2239", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hote", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Malei", "iso_1_code": null, "iso_3_code": "hot", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2252", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yamap", "iso_1_code": null, "iso_3_code": "ymp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2253", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2251", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2238", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaiwa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Iwal", "iso_1_code": null, "iso_3_code": "kbm", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2255", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2254", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2237", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2207", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngero-Vitiaz", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Ngero", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bariai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bariai", "iso_1_code": null, "iso_3_code": "bch", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2259", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Lusi", "iso_1_code": null, "iso_3_code": "khl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2260", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kove", "iso_1_code": null, "iso_3_code": "kvc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2261", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mala", "iso_1_code": null, "iso_3_code": "mmt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2262", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2258", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tuam", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Gitua", "iso_1_code": null, "iso_3_code": "ggt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2264", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mutu", "iso_1_code": null, "iso_3_code": "tuc", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2265", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "2263", - "scripts": [], - "own_tokenizer": false - } + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, + "node_i": "2263", + "native_tokenizers": [], + "scripts": [] + } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2257", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vitiaz", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bel", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Astrolabe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Awad Bing", "iso_1_code": null, "iso_3_code": "bcu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2269", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mindiri", "iso_1_code": null, "iso_3_code": "mpn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2270", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yote", "iso_1_code": null, "iso_3_code": "wab", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2271", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2268", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nuclear Bel", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bilbil", "iso_1_code": null, "iso_3_code": "brz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2274", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gedaged", "iso_1_code": null, "iso_3_code": "gdd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2275", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Matukar Panau", "iso_1_code": null, "iso_3_code": "mjk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2276", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Takia", "iso_1_code": null, "iso_3_code": "tbc", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2277", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2273", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Marik", "iso_1_code": null, "iso_3_code": "dad", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2279", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2278", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2272", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2267", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kilenge-Maleu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Maleu-Kilenge", "iso_1_code": null, "iso_3_code": "mgl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2281", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2280", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Korap", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Arop-Lokep", "iso_1_code": null, "iso_3_code": "apr", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2283", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Karnai", "iso_1_code": null, "iso_3_code": "bbv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2284", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pano", "iso_1_code": null, "iso_3_code": "mqz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2285", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mur Pano", "iso_1_code": null, "iso_3_code": "tkv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2286", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2282", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mangap-Mbula", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Mbula", "iso_1_code": null, "iso_3_code": "mna", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2288", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2287", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mengen", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Mamusi", "iso_1_code": null, "iso_3_code": "kdf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2290", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mengen", "iso_1_code": null, "iso_3_code": "mee", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2291", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Lote", "iso_1_code": null, "iso_3_code": "uvl", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2292", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2289", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Roinji-Nenaya", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Mato", "iso_1_code": null, "iso_3_code": "met", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2294", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Uma", "iso_1_code": null, "iso_3_code": "roe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2295", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2293", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sio", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Sio", "iso_1_code": null, "iso_3_code": "xsi", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2297", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2296", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southwest New Britain", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Amara", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Amara", "iso_1_code": null, "iso_3_code": "aie", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2300", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2299", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Arawe-Pasismanua", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Arawe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Mangseng", "iso_1_code": null, "iso_3_code": "mbh", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2303", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "East Arawe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Akolet", "iso_1_code": null, "iso_3_code": "akt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2305", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Avau", "iso_1_code": null, "iso_3_code": "avb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2306", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bebeli", "iso_1_code": null, "iso_3_code": "bek", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2307", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Amio-Gelimi", "iso_1_code": null, "iso_3_code": "let", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2308", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2304", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Arawe", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Solong", "iso_1_code": null, "iso_3_code": "aaw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2310", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ambul", "iso_1_code": null, "iso_3_code": "apo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2311", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gimi", "iso_1_code": null, "iso_3_code": "gip", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2312", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aiklep", "iso_1_code": null, "iso_3_code": "mwg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2313", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2309", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2302", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pasismanua", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Aighon", "iso_1_code": null, "iso_3_code": "aix", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2315", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Miu", "iso_1_code": null, "iso_3_code": "mpo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2316", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaulong", "iso_1_code": null, "iso_3_code": "pss", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2317", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sengseng", "iso_1_code": null, "iso_3_code": "ssz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2318", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karore", "iso_1_code": null, "iso_3_code": "xkx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2319", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2314", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2301", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bibling", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lamogai", "iso_1_code": null, "iso_3_code": "lmg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2321", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mouk-Aria", "iso_1_code": null, "iso_3_code": "mwh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2322", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2320", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2298", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tami", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tami", "iso_1_code": null, "iso_3_code": "tmy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2324", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2323", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2266", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2256", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sarmi-Jayapura Bay", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jayapura Bay", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kayupulau", "iso_1_code": null, "iso_3_code": "kzu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2327", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ormu", "iso_1_code": null, "iso_3_code": "orz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2328", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tobati", "iso_1_code": null, "iso_3_code": "tti", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2329", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2326", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sarmi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Anus", "iso_1_code": null, "iso_3_code": "auq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2331", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bonggo", "iso_1_code": null, "iso_3_code": "bpg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2332", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Masimasi", "iso_1_code": null, "iso_3_code": "ism", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2333", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaptiau", "iso_1_code": null, "iso_3_code": "kbi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2334", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Liki", "iso_1_code": null, "iso_3_code": "lio", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2335", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Fedan", "iso_1_code": null, "iso_3_code": "pdn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2336", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sobei", "iso_1_code": null, "iso_3_code": "sob", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2337", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tarpia", "iso_1_code": null, "iso_3_code": "tpf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2338", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mo", "iso_1_code": null, "iso_3_code": "wkd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2339", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sunum", "iso_1_code": null, "iso_3_code": "ymn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2340", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yarsun", "iso_1_code": null, "iso_3_code": "yrs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2341", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2330", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2325", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Schouten", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kairiru-Manam", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kairiru", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kaiep", "iso_1_code": null, "iso_3_code": "kbw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2345", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kairiru", "iso_1_code": null, "iso_3_code": "kxa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2346", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Terebu", "iso_1_code": null, "iso_3_code": "trb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2347", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2344", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manam", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Biem", "iso_1_code": null, "iso_3_code": "bmc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2349", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kis", "iso_1_code": null, "iso_3_code": "kis", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2350", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Medebur", "iso_1_code": null, "iso_3_code": "mjm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2351", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manam", "iso_1_code": null, "iso_3_code": "mva", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2352", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sepa", "iso_1_code": null, "iso_3_code": "spe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2353", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wogeo", "iso_1_code": null, "iso_3_code": "woc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2354", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2348", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2343", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Siau", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Orop", "iso_1_code": null, "iso_3_code": "aps", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2356", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malol", "iso_1_code": null, "iso_3_code": "mbk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2357", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sera", "iso_1_code": null, "iso_3_code": "sry", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2358", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Essono", "iso_1_code": null, "iso_3_code": "sso", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2359", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ulau-Suain", "iso_1_code": null, "iso_3_code": "svb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2360", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tumleo", "iso_1_code": null, "iso_3_code": "tmq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2361", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kap", "iso_1_code": null, "iso_3_code": "ykm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2362", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2355", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2342", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2206", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Papuan Tip", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Nuclear", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Maisin", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Maisin", "iso_1_code": null, "iso_3_code": "mbq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2366", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2365", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North Papuan Mainland-D\u2019Entrecasteaux", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Anuki", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Anuki", "iso_1_code": null, "iso_3_code": "aui", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2369", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2368", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Are-Taupota", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Are", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Miniafia Oyan", "iso_1_code": null, "iso_3_code": "aai", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2372", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ghayavi", "iso_1_code": null, "iso_3_code": "bmk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2373", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Doga", "iso_1_code": null, "iso_3_code": "dgg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2374", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Are", "iso_1_code": null, "iso_3_code": "mwc", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2375", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Gapapaiwa", "iso_1_code": null, "iso_3_code": "pwg", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2376", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ubir", "iso_1_code": null, "iso_3_code": "ubr", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2377", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kaninuwa", "iso_1_code": null, "iso_3_code": "wat", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2378", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2371", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Taupota", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Gweda", "iso_1_code": null, "iso_3_code": "grw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2380", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Haigwai", "iso_1_code": null, "iso_3_code": "hgw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2381", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maiwala", "iso_1_code": null, "iso_3_code": "mum", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2382", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Minaveha", "iso_1_code": null, "iso_3_code": "mvn", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2383", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tawala", "iso_1_code": null, "iso_3_code": "tbo", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2384", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Taupota", "iso_1_code": null, "iso_3_code": "tpa", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2385", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Wa\u2019ema", "iso_1_code": null, "iso_3_code": "wag", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2386", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wedau", "iso_1_code": null, "iso_3_code": "wed", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2387", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yakaikeke", "iso_1_code": null, "iso_3_code": "ykk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2388", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2379", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2370", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bwaidoga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bwaidoka", "iso_1_code": null, "iso_3_code": "bwd", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2390", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Goodenough, West", "iso_1_code": null, "iso_3_code": "ddi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2391", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koluwawa", "iso_1_code": null, "iso_3_code": "klx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2392", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Molima", "iso_1_code": null, "iso_3_code": "mox", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2393", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Maiadomu", "iso_1_code": null, "iso_3_code": "mzz", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2394", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Iduna", "iso_1_code": null, "iso_3_code": "viv", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2395", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Iamalele", "iso_1_code": null, "iso_3_code": "yml", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2396", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2389", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dobu-Duau", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bunama", "iso_1_code": null, "iso_3_code": "bdd", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2398", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Boselewa", "iso_1_code": null, "iso_3_code": "bwf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2399", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dobu", "iso_1_code": null, "iso_3_code": "dob", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2400", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Duau", "iso_1_code": null, "iso_3_code": "dva", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2401", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Galeya", "iso_1_code": null, "iso_3_code": "gar", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2402", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mwatebu", "iso_1_code": null, "iso_3_code": "mwa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2403", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sewa Bay", "iso_1_code": null, "iso_3_code": "sew", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2404", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2397", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gumawana", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gumawana", "iso_1_code": null, "iso_3_code": "gvs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2406", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2405", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kakabai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Dawawa", "iso_1_code": null, "iso_3_code": "dww", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2408", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kakabai", "iso_1_code": null, "iso_3_code": "kqf", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2409", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2407", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "2367", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Suauic", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "2367", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Suauic", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Buhutu", "iso_1_code": null, "iso_3_code": "bxh", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2411", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "\u2019Auhelawa", "iso_1_code": null, "iso_3_code": "kud", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2412", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Oya\u2019oya", "iso_1_code": null, "iso_3_code": "oyy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2413", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saliba", "iso_1_code": null, "iso_3_code": "sbe", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2414", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Suau", "iso_1_code": null, "iso_3_code": "swp", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2415", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bwanabwana", "iso_1_code": null, "iso_3_code": "tte", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2416", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Unubahe", "iso_1_code": null, "iso_3_code": "unu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2417", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wagawaga", "iso_1_code": null, "iso_3_code": "wgb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2418", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yaleba", "iso_1_code": null, "iso_3_code": "ylb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2419", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2410", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2364", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Peripheral", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Central Papuan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Oumic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ouma", "iso_1_code": null, "iso_3_code": "oum", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2423", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Magoric", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bina", "iso_1_code": null, "iso_3_code": "bmn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2425", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yoba", "iso_1_code": null, "iso_3_code": "yob", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2426", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Magori", "iso_1_code": null, "iso_3_code": "zgr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2427", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2424", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2422", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sinagoro-Keapara", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Vula\u2019a", "iso_1_code": null, "iso_3_code": "hul", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2429", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Keapara", "iso_1_code": null, "iso_3_code": "khz", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2430", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Motu", "iso_1_code": null, "iso_3_code": "meu", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2431", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sinaugoro", "iso_1_code": null, "iso_3_code": "snc", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2432", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2428", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Central Papuan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Gabadi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Abadi", "iso_1_code": null, "iso_3_code": "kbt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2435", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2434", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nuclear", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Toura", "iso_1_code": null, "iso_3_code": "don", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2437", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuni", "iso_1_code": null, "iso_3_code": "kse", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2438", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mekeo", "iso_1_code": null, "iso_3_code": "mek", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2439", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Lala", "iso_1_code": null, "iso_3_code": "nrz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2440", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waima", "iso_1_code": null, "iso_3_code": "rro", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2441", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2436", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2433", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2421", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kilivila-Louisiades", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kilivila", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Budibud", "iso_1_code": null, "iso_3_code": "btp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2444", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kilivila", "iso_1_code": null, "iso_3_code": "kij", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2445", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Muyuw", "iso_1_code": null, "iso_3_code": "myw", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2446", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2443", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Misima", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Misima-Panaeati", "iso_1_code": null, "iso_3_code": "mpx", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2448", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2447", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nimoa-Sudest", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Rifao", "iso_1_code": null, "iso_3_code": "nmw", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2450", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sudest", "iso_1_code": null, "iso_3_code": "tgo", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2451", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2449", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2442", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2420", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2363", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2108", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yapese", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Yapese", "iso_1_code": null, "iso_3_code": "yap", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2453", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2452", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1739", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South Halmahera-West New Guinea", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "South Halmahera", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Irarutu", "iso_1_code": null, "iso_3_code": "irh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2456", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East Makian-Gane", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gane", "iso_1_code": null, "iso_3_code": "gzn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2458", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makian, East", "iso_1_code": null, "iso_3_code": "mky", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2459", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2457", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southeast", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Buli", "iso_1_code": null, "iso_3_code": "bzq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2461", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maba", "iso_1_code": null, "iso_3_code": "mqa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2462", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Patani", "iso_1_code": null, "iso_3_code": "ptn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2463", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sawai", "iso_1_code": null, "iso_3_code": "szw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2464", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2460", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2455", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West New Guinea", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bomberai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bedoanas", "iso_1_code": null, "iso_3_code": "bed", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2467", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Erokwanas", "iso_1_code": null, "iso_3_code": "erw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2468", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2466", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cenderawasih Bay", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Biakic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Biak", "iso_1_code": null, "iso_3_code": "bhw", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2471", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Dusner", "iso_1_code": null, "iso_3_code": "dsn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2472", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Meoswar", "iso_1_code": null, "iso_3_code": "mvx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2473", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2470", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Iresim", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yeresiam", "iso_1_code": null, "iso_3_code": "ire", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2475", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2474", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mor", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mor", "iso_1_code": null, "iso_3_code": "mhz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2477", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2476", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Raja Ampat", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "As", "iso_1_code": null, "iso_3_code": "asz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2479", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Biga", "iso_1_code": null, "iso_3_code": "bhc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2480", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gebe", "iso_1_code": null, "iso_3_code": "gei", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2481", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kawe", "iso_1_code": null, "iso_3_code": "kgb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2482", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Legenyem", "iso_1_code": null, "iso_3_code": "lcc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2483", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ma\u2019ya", "iso_1_code": null, "iso_3_code": "slz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2484", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ambel", "iso_1_code": null, "iso_3_code": "wgo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2485", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wauyai", "iso_1_code": null, "iso_3_code": "wuy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2486", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Matbat", "iso_1_code": null, "iso_3_code": "xmt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2487", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Salawati", "iso_1_code": null, "iso_3_code": "xmx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2488", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2478", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tandia", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tandia", "iso_1_code": null, "iso_3_code": "tni", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2490", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2489", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waropen", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Waropen", "iso_1_code": null, "iso_3_code": "wrp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2492", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2491", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yapen", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Central-Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Ambai", "iso_1_code": null, "iso_3_code": "amk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2495", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ansus", "iso_1_code": null, "iso_3_code": "and", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2496", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Busami", "iso_1_code": null, "iso_3_code": "bsm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2497", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Munggui", "iso_1_code": null, "iso_3_code": "mth", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2498", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Marau", "iso_1_code": null, "iso_3_code": "mvr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2499", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pom", "iso_1_code": null, "iso_3_code": "pmo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2500", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Papuma", "iso_1_code": null, "iso_3_code": "ppm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2501", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Roon", "iso_1_code": null, "iso_3_code": "rnn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2502", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Serui-Laut", "iso_1_code": null, "iso_3_code": "seu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2503", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wamesa", "iso_1_code": null, "iso_3_code": "wad", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2504", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Woi", "iso_1_code": null, "iso_3_code": "wbw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2505", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2494", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kurudu", "iso_1_code": null, "iso_3_code": "kjr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2507", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wabo", "iso_1_code": null, "iso_3_code": "wbb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2508", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2506", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2493", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yaur", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yaur", "iso_1_code": null, "iso_3_code": "jau", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2510", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2509", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yeretuar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yeretuar", "iso_1_code": null, "iso_3_code": "gop", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2512", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2511", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2469", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2465", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2454", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1738", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hukumina", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hukumina", "iso_1_code": null, "iso_3_code": "huw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2514", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2513", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North Bomberai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Arguni", "iso_1_code": null, "iso_3_code": "agf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2516", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Onin", "iso_1_code": null, "iso_3_code": "oni", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2517", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sekar", "iso_1_code": null, "iso_3_code": "skz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2518", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uruangnirin", "iso_1_code": null, "iso_3_code": "urn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2519", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2515", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South Bomberai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kowiai", "iso_1_code": null, "iso_3_code": "kwh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2521", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2520", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southeast Maluku", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kei-Tanimbar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kei-Fordata", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Fordata", "iso_1_code": null, "iso_3_code": "frd", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2525", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kei", "iso_1_code": null, "iso_3_code": "kei", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2526", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2524", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Yamdena", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Yamdena", "iso_1_code": null, "iso_3_code": "jmd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2528", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2527", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2523", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Selaru", "iso_1_code": null, "iso_3_code": "slu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2530", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Seluwasan", "iso_1_code": null, "iso_3_code": "sws", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2531", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2529", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2522", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sumba-Hawu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Hawu-Dhao", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Hawu", "iso_1_code": null, "iso_3_code": "hvn", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2534", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Dhao", "iso_1_code": null, "iso_3_code": "nfa", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2535", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2533", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sumba", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Anakalangu", "iso_1_code": null, "iso_3_code": "akg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2537", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kodi", "iso_1_code": null, "iso_3_code": "kod", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2538", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lamboya", "iso_1_code": null, "iso_3_code": "lmy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2539", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Loura", "iso_1_code": null, "iso_3_code": "lur", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2540", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mamboru", "iso_1_code": null, "iso_3_code": "mvd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2541", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wejewa", "iso_1_code": null, "iso_3_code": "wew", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2542", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Wanukaka", "iso_1_code": null, "iso_3_code": "wnk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2543", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kambera", "iso_1_code": null, "iso_3_code": "xbr", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2544", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2536", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2532", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teor-Kur", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kur", "iso_1_code": null, "iso_3_code": "kuv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2546", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teor", "iso_1_code": null, "iso_3_code": "tev", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2547", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2545", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Timor-Babar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Nuclear Timor", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Galolen", "iso_1_code": null, "iso_3_code": "gal", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2550", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Habun", "iso_1_code": null, "iso_3_code": "hbu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2551", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Helong", "iso_1_code": null, "iso_3_code": "heg", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2552", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Idat\u00e9", "iso_1_code": null, "iso_3_code": "idt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2553", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kemak", "iso_1_code": null, "iso_3_code": "kem", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2554", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kairui-Midiki", "iso_1_code": null, "iso_3_code": "krd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2555", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lakalei", "iso_1_code": null, "iso_3_code": "lka", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2556", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makuva", "iso_1_code": null, "iso_3_code": "lva", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2557", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mambae", "iso_1_code": null, "iso_3_code": "mgm", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2558", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nauete", "iso_1_code": null, "iso_3_code": "nxa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2559", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tetun", "iso_1_code": null, "iso_3_code": "tet", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2560", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tukudede", "iso_1_code": null, "iso_3_code": "tkd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2561", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Welaun", "iso_1_code": null, "iso_3_code": "wlh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2562", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waima\u2019a", "iso_1_code": null, "iso_3_code": "wmh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2563", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rote", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bilba", "iso_1_code": null, "iso_3_code": "bpz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2565", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dengka", "iso_1_code": null, "iso_3_code": "dnk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2566", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lole", "iso_1_code": null, "iso_3_code": "llg", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2567", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Rikou", "iso_1_code": null, "iso_3_code": "rgu", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2568", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Dela-Oenale", "iso_1_code": null, "iso_3_code": "row", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2569", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Termanu", "iso_1_code": null, "iso_3_code": "twu", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2570", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tii", "iso_1_code": null, "iso_3_code": "txq", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2571", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2564", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uab Meto", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Amarasi", "iso_1_code": null, "iso_3_code": "aaz", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2573", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Uab Meto", "iso_1_code": null, "iso_3_code": "aoz", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2574", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Baikeno", "iso_1_code": null, "iso_3_code": "bkx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2575", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2572", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2549", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southwest Maluku", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "East Damar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Damar, East", "iso_1_code": null, "iso_3_code": "dmr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2578", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2577", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kisar-Roma", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kisar", "iso_1_code": null, "iso_3_code": "kje", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2580", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Roma", "iso_1_code": null, "iso_3_code": "rmm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2581", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2579", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Luang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Luang", "iso_1_code": null, "iso_3_code": "lex", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2583", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Leti", "iso_1_code": null, "iso_3_code": "lti", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2584", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2582", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teun-Nila-Serua", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nila-Serua", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nila", "iso_1_code": null, "iso_3_code": "nil", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2587", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Serua", "iso_1_code": null, "iso_3_code": "srw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2588", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2586", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teun", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Te\u2019un", "iso_1_code": null, "iso_3_code": "tve", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2590", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2589", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2585", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wetar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Atauran", "iso_1_code": null, "iso_3_code": "adb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2592", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aputai", "iso_1_code": null, "iso_3_code": "apx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2593", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ili\u2019uun", "iso_1_code": null, "iso_3_code": "ilu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2594", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tugun", "iso_1_code": null, "iso_3_code": "tzn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2595", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Perai", "iso_1_code": null, "iso_3_code": "wet", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2596", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2591", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2576", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2548", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kuri", "iso_1_code": null, "iso_3_code": "nbn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2598", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2597", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Damar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Damar, West", "iso_1_code": null, "iso_3_code": "drn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2600", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2599", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1589", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chamorro", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Chamorro", "iso_1_code": "ch", "iso_3_code": "cha", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2602", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2601", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Greater Barito", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Barito-Mahakam", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ampanang", "iso_1_code": null, "iso_3_code": "apg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2605", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tunjung", "iso_1_code": null, "iso_3_code": "tjg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2606", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2604", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Central-South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dusun Deyah", "iso_1_code": null, "iso_3_code": "dun", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2610", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2609", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Dusun Malang", "iso_1_code": null, "iso_3_code": "duq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2612", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dusun Witu", "iso_1_code": null, "iso_3_code": "duw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2613", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ma\u2019anyan", "iso_1_code": null, "iso_3_code": "mhy", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2614", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Paku", "iso_1_code": null, "iso_3_code": "pku", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2615", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2611", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2608", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malagasy", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Malagasy, Bara", "iso_1_code": "mg", "iso_3_code": "bhr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2617", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malagasy, Northern Betsimisaraka", "iso_1_code": "mg", "iso_3_code": "bmm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2618", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bushi", "iso_1_code": null, "iso_3_code": "buc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2619", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malagasy, Southern Betsimisaraka", "iso_1_code": "mg", "iso_3_code": "bzc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2620", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malagasy, Masikoro", "iso_1_code": "mg", "iso_3_code": "msh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2621", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malagasy, Merina", "iso_1_code": "mg", "iso_3_code": "plt", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2622", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Malagasy, Sakalava", "iso_1_code": "mg", "iso_3_code": "skg", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2623", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Malagasy, Tandroy-Mahafaly", "iso_1_code": "mg", "iso_3_code": "tdx", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2624", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Malagasy, Tesaka", "iso_1_code": "mg", "iso_3_code": "tkg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2625", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malagasy, Tanosy", "iso_1_code": "mg", "iso_3_code": "txy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2626", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malagasy, Antankarana", "iso_1_code": "mg", "iso_3_code": "xmv", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2627", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Malagasy, Tsimihety", "iso_1_code": "mg", "iso_3_code": "xmw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2628", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2616", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lawangan", "iso_1_code": null, "iso_3_code": "lbx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2630", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tawoyan", "iso_1_code": null, "iso_3_code": "twy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2631", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2629", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2607", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sama-Bajaw", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Yakan", "iso_1_code": null, "iso_3_code": "yka", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2633", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Abaknon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Inabaknon", "iso_1_code": null, "iso_3_code": "abx", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2635", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2634", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sulu-Borneo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Borneo Coast Bajaw", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bajau, Indonesian", "iso_1_code": null, "iso_3_code": "bdl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2638", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bajau, West Coast", "iso_1_code": null, "iso_3_code": "bdr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2639", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mapun", "iso_1_code": null, "iso_3_code": "sjm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2640", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2637", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Inner Sulu Sama", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Sama, Central", "iso_1_code": null, "iso_3_code": "sml", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2642", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sama, Southern", "iso_1_code": null, "iso_3_code": "ssb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2643", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sama, Balangingih", "iso_1_code": null, "iso_3_code": "sse", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2644", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2641", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western Sulu Sama", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sama, Pangutaran", "iso_1_code": null, "iso_3_code": "slm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2646", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2645", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2636", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2632", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kohin", "iso_1_code": null, "iso_3_code": "kkx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2649", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ot Danum", "iso_1_code": null, "iso_3_code": "otd", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2650", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Siang", "iso_1_code": null, "iso_3_code": "sya", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2651", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2648", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bakumpai", "iso_1_code": null, "iso_3_code": "bkr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2653", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngaju", "iso_1_code": null, "iso_3_code": "nij", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2654", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2652", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2647", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2603", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Greater Central Philippine", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Central Philippine", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ata", "iso_1_code": null, "iso_3_code": "atm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2657", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ayta, Sorsogon", "iso_1_code": null, "iso_3_code": "ays", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2658", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Binukidnon, Northern", "iso_1_code": null, "iso_3_code": "kyn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2659", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Binukidnon, Southern", "iso_1_code": null, "iso_3_code": "mtw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2660", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sulod", "iso_1_code": null, "iso_3_code": "srg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2661", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bikol", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Coastal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Naga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Agta, Katubung", "iso_1_code": null, "iso_3_code": "agk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2665", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Agta, Mt. Iraya", "iso_1_code": null, "iso_3_code": "atl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2666", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bikol, Central", "iso_1_code": null, "iso_3_code": "bcl", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2667", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2664", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Virac", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bikol, Southern Catanduanes", "iso_1_code": null, "iso_3_code": "bln", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2669", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2668", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2663", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Inland", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Agta, Mt. Iriga", "iso_1_code": null, "iso_3_code": "agz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2671", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bikol, West Albay", "iso_1_code": null, "iso_3_code": "fbl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2672", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bikol, Libon", "iso_1_code": null, "iso_3_code": "lbl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2673", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bikol, Miraya", "iso_1_code": null, "iso_3_code": "rbl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2674", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bikol, Buhi\u2019non", "iso_1_code": null, "iso_3_code": "ubl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2675", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Iriga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bikol, Rinconada", "iso_1_code": null, "iso_3_code": "bto", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2677", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2676", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2670", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pandan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bikol, Northern Catanduanes", "iso_1_code": null, "iso_3_code": "cts", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2679", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2678", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2662", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bisayan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Banton", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bantoanon", "iso_1_code": null, "iso_3_code": "bno", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2682", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2681", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cebuan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Cebuano", "iso_1_code": null, "iso_3_code": "ceb", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2684", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2683", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bantayanon", "iso_1_code": null, "iso_3_code": "bfx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2686", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Peripheral", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ati", "iso_1_code": null, "iso_3_code": "atk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2688", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Capiznon", "iso_1_code": null, "iso_3_code": "cps", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2689", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hiligaynon", "iso_1_code": null, "iso_3_code": "hil", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2690", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Masbatenyo", "iso_1_code": null, "iso_3_code": "msb", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2691", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Porohanon", "iso_1_code": null, "iso_3_code": "prh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2692", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2687", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Romblon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Romblomanon", "iso_1_code": null, "iso_3_code": "rol", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2694", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2693", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Warayan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Sorsoganon, Northern", "iso_1_code": null, "iso_3_code": "bks", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2696", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Baybayanon", "iso_1_code": null, "iso_3_code": "bvy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2697", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kinabalian", "iso_1_code": null, "iso_3_code": "cbw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2698", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gubat", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sorsoganon, Southern", "iso_1_code": null, "iso_3_code": "srv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2700", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2699", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Samar-Waray", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Waray-Waray", "iso_1_code": null, "iso_3_code": "war", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2702", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": true + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2701", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2695", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2685", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Surigaonon", "iso_1_code": null, "iso_3_code": "sgd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2704", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tandaganon", "iso_1_code": null, "iso_3_code": "tgn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2705", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Butuan-Tausug", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Butuanon", "iso_1_code": null, "iso_3_code": "btw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2707", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tausug", "iso_1_code": null, "iso_3_code": "tsg", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2708", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2706", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2703", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Caluyanun", "iso_1_code": null, "iso_3_code": "clu", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2710", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Aklan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aklanon", "iso_1_code": null, "iso_3_code": "akl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2712", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malaynon", "iso_1_code": null, "iso_3_code": "mlz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2713", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2711", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kinarayan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kinaray-a", "iso_1_code": null, "iso_3_code": "krj", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2715", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2714", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuyan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ratagnon", "iso_1_code": null, "iso_3_code": "btn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2717", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cuyonon", "iso_1_code": null, "iso_3_code": "cyo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2718", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2716", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Inonhan", "iso_1_code": null, "iso_3_code": "loc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2720", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2719", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2709", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2680", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mamanwa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Minamanwa", "iso_1_code": null, "iso_3_code": "mmn", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2722", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2721", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mansakan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Davawenyo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Davawenyo", "iso_1_code": null, "iso_3_code": "daw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2725", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2724", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Mandaya", "iso_1_code": null, "iso_3_code": "mry", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2727", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mansaka", "iso_1_code": null, "iso_3_code": "msk", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2728", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2726", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kamayo", "iso_1_code": null, "iso_3_code": "kyk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2730", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2729", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Tagakolu", "iso_1_code": null, "iso_3_code": "klg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2732", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kalagan, Kagan", "iso_1_code": null, "iso_3_code": "kll", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2733", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kalagan", "iso_1_code": null, "iso_3_code": "kqe", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2734", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2731", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2723", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tagalog", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Filipino", "iso_1_code": null, "iso_3_code": "fil", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2736", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": false + "scripts": [ + "Latn" + ] }, { "name": "Tagalog", "iso_1_code": "tl", "iso_3_code": "tgl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2737", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2735", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2656", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Danao", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Magindanao", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Maguindanaon", "iso_1_code": null, "iso_3_code": "mdh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2740", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2739", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maranao-Iranon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Iranun", "iso_1_code": null, "iso_3_code": "ilm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2742", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Iranun", "iso_1_code": null, "iso_3_code": "ilp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2743", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maranao", "iso_1_code": null, "iso_3_code": "mrw", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2744", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2741", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2738", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gorontalo-Mongondow", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Gorontalic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bolango", "iso_1_code": null, "iso_3_code": "bld", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2747", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Buol", "iso_1_code": null, "iso_3_code": "blf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2748", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bintauna", "iso_1_code": null, "iso_3_code": "bne", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2749", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gorontalo", "iso_1_code": null, "iso_3_code": "gor", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2750", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kaidipang", "iso_1_code": null, "iso_3_code": "kzp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2751", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lolak", "iso_1_code": null, "iso_3_code": "llq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2752", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Suwawa", "iso_1_code": null, "iso_3_code": "swu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2753", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2746", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mongondowic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Mongondow", "iso_1_code": null, "iso_3_code": "mog", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2755", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ponosakan", "iso_1_code": null, "iso_3_code": "pns", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2756", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2754", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2745", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manobo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Manobo, Dibabawon", "iso_1_code": null, "iso_3_code": "mbd", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2760", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Manobo, Rajah Kabunsuwan", "iso_1_code": null, "iso_3_code": "mqk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2761", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manobo, Agusan", "iso_1_code": null, "iso_3_code": "msm", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2762", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2759", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Ata-Tigwa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Manobo, Ata", "iso_1_code": null, "iso_3_code": "atd", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2765", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Manobo, Matigsalug", "iso_1_code": null, "iso_3_code": "mbt", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2766", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2764", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Obo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Manobo, Obo", "iso_1_code": null, "iso_3_code": "obo", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2768", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2767", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2763", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Manobo, Western Bukidnon", "iso_1_code": null, "iso_3_code": "mbb", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2770", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Manobo, Ilianen", "iso_1_code": null, "iso_3_code": "mbi", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2771", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2769", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "2758", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "North", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "2758", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "North", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Binukid", "iso_1_code": null, "iso_3_code": "bkd", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2773", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kagayanen", "iso_1_code": null, "iso_3_code": "cgc", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2774", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Higaonon", "iso_1_code": null, "iso_3_code": "mba", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2775", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Manobo, Kinamiging", "iso_1_code": null, "iso_3_code": "mkx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2776", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2772", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Tagabawa", "iso_1_code": null, "iso_3_code": "bgs", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2778", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Manobo, Sarangani", "iso_1_code": null, "iso_3_code": "mbs", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2779", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Manobo, Cotabato", "iso_1_code": null, "iso_3_code": "mta", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2780", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2777", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2757", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Palawanic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bonggi", "iso_1_code": null, "iso_3_code": "bdg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2782", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Batak", "iso_1_code": null, "iso_3_code": "bya", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2783", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Palawano, Central", "iso_1_code": null, "iso_3_code": "plc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2784", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Palawano, Southwest", "iso_1_code": null, "iso_3_code": "plv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2785", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Palawano, Brooke\u2019s Point", "iso_1_code": null, "iso_3_code": "plw", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2786", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Molbog", "iso_1_code": null, "iso_3_code": "pwm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2787", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tagbanwa", "iso_1_code": null, "iso_3_code": "tbw", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2788", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tagbanwa, Central", "iso_1_code": null, "iso_3_code": "tgt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2789", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2781", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South Mangyan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Buhid-Taubuid", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Buhid", "iso_1_code": null, "iso_3_code": "bku", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2792", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bangon", "iso_1_code": null, "iso_3_code": "bnj", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2793", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tawbuid", "iso_1_code": null, "iso_3_code": "twb", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2794", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2791", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hanunoo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Hanunoo", "iso_1_code": null, "iso_3_code": "hnn", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2796", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2795", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2790", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Subanon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Subanon, Western", "iso_1_code": null, "iso_3_code": "suc", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2798", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Subanen, Southern", "iso_1_code": null, "iso_3_code": "laa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2800", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Subanen, Eastern", "iso_1_code": null, "iso_3_code": "sfe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2801", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Subanon, Kolibugan", "iso_1_code": null, "iso_3_code": "skn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2802", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Subanen, Northern", "iso_1_code": null, "iso_3_code": "stb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2803", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Subanen, Central", "iso_1_code": null, "iso_3_code": "syb", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2804", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2799", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2797", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Umiray Dumaget", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Manide", "iso_1_code": null, "iso_3_code": "abd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2806", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Agta, Umiray Dumaget", "iso_1_code": null, "iso_3_code": "due", + "children": [], "tokenizers": { "Latn": { - "full_object": "IndicNLPTokenizer(\"war\")", - "original_lang_name": "war", - "original_lang_code": "war", - "scripts": [ - "Latn" - ], - "class_name": "IndicNLPTokenizer", - "macrolanguage": false + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2807", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Inagta Alabat", "iso_1_code": null, "iso_3_code": "dul", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2808", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2805", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"tl\")", + "original_lang_name": "filipino", + "original_lang_code": "fil", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2655", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Javanese", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Javanese, New Caledonian", "iso_1_code": null, "iso_3_code": "jas", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2810", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Javanese", "iso_1_code": "jv", "iso_3_code": "jav", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2811", + "native_tokenizers": [], "scripts": [ "Latn", "Java" - ], - "own_tokenizer": false + ] }, { "name": "Javanese, Suriname", "iso_1_code": null, "iso_3_code": "jvn", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2812", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Osing", "iso_1_code": null, "iso_3_code": "osi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2813", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tengger", "iso_1_code": null, "iso_3_code": "tes", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2814", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2809", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kalamian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Agutaynen", "iso_1_code": null, "iso_3_code": "agn", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2816", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tagbanwa, Calamian", "iso_1_code": null, "iso_3_code": "tbk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2817", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2815", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lampung", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Lampung Nyo", "iso_1_code": null, "iso_3_code": "abl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2819", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Komering", "iso_1_code": null, "iso_3_code": "kge", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2820", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lampung Api", "iso_1_code": null, "iso_3_code": "ljp", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2821", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2818", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Land Dayak", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Benyadu\u2019", "iso_1_code": null, "iso_3_code": "byd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2823", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sanggau", "iso_1_code": null, "iso_3_code": "scg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2824", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bakati\u2019", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bakati\u2019", "iso_1_code": null, "iso_3_code": "bei", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2826", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bakati\u2019, Rara", "iso_1_code": null, "iso_3_code": "lra", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2827", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bakati\u2019, Sara", "iso_1_code": null, "iso_3_code": "sre", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2828", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2825", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bidayuh", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Core", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bidayuh, Biatah", "iso_1_code": null, "iso_3_code": "bth", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2832", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2831", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sembaan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bidayuh, Tringgus-Sembaan", "iso_1_code": null, "iso_3_code": "trx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2834", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2833", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bidayuh, Bau", "iso_1_code": null, "iso_3_code": "sne", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2836", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2835", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2830", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bidayuh Serian", "iso_1_code": null, "iso_3_code": "sdo", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2838", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2837", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2829", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Jangkang", "iso_1_code": null, "iso_3_code": "djo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2840", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Beginci", "iso_1_code": null, "iso_3_code": "ebc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2841", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gerai", "iso_1_code": null, "iso_3_code": "gef", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2842", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ribun", "iso_1_code": null, "iso_3_code": "rir", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2843", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Semandang", "iso_1_code": null, "iso_3_code": "sdq", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2844", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Mateq", "iso_1_code": null, "iso_3_code": "xem", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2845", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2839", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2822", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Madurese", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kangean", "iso_1_code": null, "iso_3_code": "kkv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2847", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Madura", "iso_1_code": null, "iso_3_code": "mad", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2848", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2846", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malayo-Chamic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Chamic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Acehnese", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Aceh", "iso_1_code": null, "iso_3_code": "ace", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2852", + "native_tokenizers": [], "scripts": [ - "Latn", - "Arab" - ], - "own_tokenizer": false + "Arab", + "Latn" + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2851", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Coastal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Haroi", "iso_1_code": null, "iso_3_code": "hro", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2854", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cham", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cham, Western", "iso_1_code": null, "iso_3_code": "cja", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2856", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cham, Eastern", "iso_1_code": null, "iso_3_code": "cjm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2857", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2855", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2853", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Highlands", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bih", "iso_1_code": null, "iso_3_code": "ibh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2859", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jarai", "iso_1_code": null, "iso_3_code": "jra", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2860", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Rade", "iso_1_code": null, "iso_3_code": "rad", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2861", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Chru-Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chru", "iso_1_code": null, "iso_3_code": "cje", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2863", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern Cham", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tsat", "iso_1_code": null, "iso_3_code": "huq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2865", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Roglai, Southern", "iso_1_code": null, "iso_3_code": "rgs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2866", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Roglai, Cacgia", "iso_1_code": null, "iso_3_code": "roc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2867", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Roglai, Northern", "iso_1_code": null, "iso_3_code": "rog", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2868", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2864", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2862", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2858", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2850", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malayic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Keninjal", "iso_1_code": null, "iso_3_code": "knl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2870", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kendayan", "iso_1_code": null, "iso_3_code": "knx", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2871", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Urak Lawoi\u2019", "iso_1_code": "ms", "iso_3_code": "urk", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2872", + "native_tokenizers": [], "scripts": [ "Thai" - ], - "own_tokenizer": true + ] }, { "name": "Malayic Dayak", "iso_1_code": null, "iso_3_code": "xdy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2873", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ibanic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Iban", "iso_1_code": null, "iso_3_code": "iba", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2875", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Remun", "iso_1_code": null, "iso_3_code": "lkj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2876", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mualang", "iso_1_code": null, "iso_3_code": "mtd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2877", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Seberuang", "iso_1_code": null, "iso_3_code": "sbx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2878", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2874", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Banjar", "iso_1_code": "ms", "iso_3_code": "bjn", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2880", + "native_tokenizers": [], "scripts": [ "Latn", "Arab" - ], - "own_tokenizer": true + ] }, { "name": "Malay, Bacanese", "iso_1_code": "ms", "iso_3_code": "btj", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2881", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay, Berau", "iso_1_code": "ms", "iso_3_code": "bve", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2882", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay, Bukit", "iso_1_code": "ms", "iso_3_code": "bvu", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2883", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Duano", "iso_1_code": "ms", "iso_3_code": "dup", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2884", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Haji", "iso_1_code": "ms", "iso_3_code": "hji", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2885", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Indonesian", "iso_1_code": "id", "iso_3_code": "ind", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"id\")", "original_lang_name": "indonesian", "original_lang_code": "ind", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2886", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] }, { "name": "Jakun", "iso_1_code": "ms", "iso_3_code": "jak", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2887", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay, Jambi", "iso_1_code": "ms", "iso_3_code": "jax", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2888", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Kubu", "iso_1_code": "ms", "iso_3_code": "kvb", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2889", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Kerinci", "iso_1_code": "ms", "iso_3_code": "kvr", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2890", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Brunei", "iso_1_code": "ms", "iso_3_code": "kxd", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2891", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Sekak", "iso_1_code": "ms", "iso_3_code": "lce", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2892", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Lubu", "iso_1_code": "ms", "iso_3_code": "lcf", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2893", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Col", "iso_1_code": "ms", "iso_3_code": "liw", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2894", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay, Kedah", "iso_1_code": "ms", "iso_3_code": "meo", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2895", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay, Pattani", "iso_1_code": "ms", "iso_3_code": "mfa", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2896", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Bangka", "iso_1_code": "ms", "iso_3_code": "mfb", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2897", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Indonesian, Makassar", "iso_1_code": null, "iso_3_code": "mfp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2898", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Minangkabau", "iso_1_code": "ms", "iso_3_code": "min", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2899", + "native_tokenizers": [], "scripts": [ "Latn", "Arab" - ], - "own_tokenizer": true + ] }, { "name": "Malay, Kota Bangun Kutai", "iso_1_code": "ms", "iso_3_code": "mqg", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2900", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay, Sabah", "iso_1_code": "ms", "iso_3_code": "msi", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2901", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Musi", "iso_1_code": "ms", "iso_3_code": "mui", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2902", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": true + ] }, { "name": "Orang Kanaq", "iso_1_code": "ms", "iso_3_code": "orn", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2903", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Orang Seletar", "iso_1_code": "ms", "iso_3_code": "ors", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2904", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Pekal", "iso_1_code": "ms", "iso_3_code": "pel", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2905", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay, Central", "iso_1_code": "ms", "iso_3_code": "pse", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2906", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": true + ] }, { "name": "Temuan", "iso_1_code": "ms", "iso_3_code": "tmw", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2907", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Kaur", "iso_1_code": "ms", "iso_3_code": "vkk", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2908", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay, Tenggarong Kutai", "iso_1_code": "ms", "iso_3_code": "vkt", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2909", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay", "iso_1_code": "ms", "iso_3_code": "zlm", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2910", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Negeri Sembilan Malay", "iso_1_code": "ms", "iso_3_code": "zmi", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [], + "tokenizers": {}, "node_i": "2911", - "scripts": [], - "own_tokenizer": true + "native_tokenizers": [], + "scripts": [] }, { "name": "Malay, Standard", "iso_1_code": "ms", "iso_3_code": "zsm", - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "children": [], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2912", + "native_tokenizers": [], "scripts": [ "Latn", "Arab" - ], - "own_tokenizer": true + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2879", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2869", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2849", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Minahasan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tonsawang", "iso_1_code": null, "iso_3_code": "tnw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2914", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tontemboan", "iso_1_code": null, "iso_3_code": "tnt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2916", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northeast", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tondano", "iso_1_code": null, "iso_3_code": "tdn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2918", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tombulu", "iso_1_code": null, "iso_3_code": "tom", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2919", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tonsea", "iso_1_code": null, "iso_3_code": "txs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2920", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2917", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2915", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2913", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Moklen", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Moklen", "iso_1_code": null, "iso_3_code": "mkm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2922", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Moken", "iso_1_code": null, "iso_3_code": "mwt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2923", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2921", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nasal", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nasal", "iso_1_code": null, "iso_3_code": "nsy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2925", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2924", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North Borneo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Melanau-Kajang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kajang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bukitan", "iso_1_code": null, "iso_3_code": "bkn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2929", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kajaman", "iso_1_code": null, "iso_3_code": "kag", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2930", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lahanan", "iso_1_code": null, "iso_3_code": "lhn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2931", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Punan Batu", "iso_1_code": null, "iso_3_code": "pnm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2932", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sekapan", "iso_1_code": null, "iso_3_code": "skp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2933", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sihan", "iso_1_code": null, "iso_3_code": "spg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2934", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ukit", "iso_1_code": null, "iso_3_code": "umi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2935", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2928", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Melanau", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Melanau, Daro-Matu", "iso_1_code": null, "iso_3_code": "dro", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2937", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Melanau, Kanowit-Tanjong", "iso_1_code": null, "iso_3_code": "kxn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2938", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Melanau, Central", "iso_1_code": null, "iso_3_code": "mel", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2939", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Melanau, Sibu", "iso_1_code": null, "iso_3_code": "sdx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2940", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2936", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2927", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North Sarawakan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Berawan-Lower Baram", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Berawan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Berawan, West", "iso_1_code": null, "iso_3_code": "zbw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2944", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Central-East Berawan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Berawan, Central", "iso_1_code": null, "iso_3_code": "zbc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2946", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Berawan, East", "iso_1_code": null, "iso_3_code": "zbe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2947", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2945", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2943", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lower Baram", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Central", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "A", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Belait", "iso_1_code": null, "iso_3_code": "beg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2951", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kiput", "iso_1_code": null, "iso_3_code": "kyi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2952", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2950", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "B", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Lelak", "iso_1_code": null, "iso_3_code": "llk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2954", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Narom", "iso_1_code": null, "iso_3_code": "nrm", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2955", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tutong", "iso_1_code": null, "iso_3_code": "ttg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2956", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2953", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2949", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2948", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2942", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bintulu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Vaie", "iso_1_code": null, "iso_3_code": "bny", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2958", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2957", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dayic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kelabitic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kelabit", "iso_1_code": null, "iso_3_code": "kzi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2961", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lengilu", "iso_1_code": null, "iso_3_code": "lgi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2962", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lundayeh", "iso_1_code": null, "iso_3_code": "lnd", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2963", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Putoh", "iso_1_code": null, "iso_3_code": "put", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2964", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sa\u2019ban", "iso_1_code": null, "iso_3_code": "snv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2965", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tring", "iso_1_code": null, "iso_3_code": "tgq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2966", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2960", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2959", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kayan-Kenyah", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kayanic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kayan Proper", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kayan, Busang", "iso_1_code": null, "iso_3_code": "bfg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2970", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bahau", "iso_1_code": null, "iso_3_code": "bhv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2971", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kayan, Baram", "iso_1_code": null, "iso_3_code": "kys", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2972", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kayan, Rejang", "iso_1_code": null, "iso_3_code": "ree", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2973", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kayan, Wahau", "iso_1_code": null, "iso_3_code": "whu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2974", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kayan Mahakam", "iso_1_code": null, "iso_3_code": "xay", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2975", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kayan, Mendalam", "iso_1_code": null, "iso_3_code": "xkd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2976", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kayan, Kayan River", "iso_1_code": null, "iso_3_code": "xkn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2977", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2969", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Modang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Modang", "iso_1_code": null, "iso_3_code": "mxd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2979", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Segai", "iso_1_code": null, "iso_3_code": "sge", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2980", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2978", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Muller-Schwaner \u2018Punan\u2019", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bukat", "iso_1_code": null, "iso_3_code": "bvk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2982", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Hovongan", "iso_1_code": null, "iso_3_code": "hov", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2983", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Aoheng", "iso_1_code": null, "iso_3_code": "pni", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2984", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Punan Aput", "iso_1_code": null, "iso_3_code": "pud", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2985", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Punan Merah", "iso_1_code": null, "iso_3_code": "puf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2986", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kereho", "iso_1_code": null, "iso_3_code": "xke", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2987", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2981", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Murik Kayan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Murik", "iso_1_code": null, "iso_3_code": "mxr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2989", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2988", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2968", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kenyah", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kenyah, Mainstream", "iso_1_code": null, "iso_3_code": "xkl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2991", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kayanic Kenyah", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Sebop", "iso_1_code": null, "iso_3_code": "sib", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2993", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Long Wat", "iso_1_code": null, "iso_3_code": "ttw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2994", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kenyah, Wahau", "iso_1_code": null, "iso_3_code": "whk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "2995", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2992", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Upper Pujungan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Uma\u2019 Lung", "iso_1_code": null, "iso_3_code": "ulu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2997", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Uma\u2019 Lasan", "iso_1_code": null, "iso_3_code": "xky", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "2998", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "2996", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2990", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Penan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Penan, Eastern", "iso_1_code": null, "iso_3_code": "pez", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3000", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Penan, Western", "iso_1_code": null, "iso_3_code": "pne", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3001", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2999", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2967", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Punan Tubu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Punan Tubu", "iso_1_code": null, "iso_3_code": "puj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3003", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3002", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2941", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rejang-Sajau", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Basap", "iso_1_code": null, "iso_3_code": "bdb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3005", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Burusu", "iso_1_code": null, "iso_3_code": "bqr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3006", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Punan Bah-Biau", "iso_1_code": null, "iso_3_code": "pna", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3007", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Punan Merap", "iso_1_code": null, "iso_3_code": "puc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3008", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sajau Basap", "iso_1_code": null, "iso_3_code": "sjb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3009", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3004", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sabahan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Dusunic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bisaya-Lotud", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bisaya, Sabah", "iso_1_code": null, "iso_3_code": "bsy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3013", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lotud", "iso_1_code": null, "iso_3_code": "dtr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3014", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bisaya, Brunei", "iso_1_code": null, "iso_3_code": "bsb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3016", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3015", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3012", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Dusun", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kuijau", "iso_1_code": null, "iso_3_code": "dkr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3018", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rungus", "iso_1_code": null, "iso_3_code": "drg", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3019", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kota Marudu Talantang", "iso_1_code": null, "iso_3_code": "grm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3020", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kimaragang", "iso_1_code": null, "iso_3_code": "kqr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3021", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kadazan, Klias River", "iso_1_code": null, "iso_3_code": "kqt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3022", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tobilung", "iso_1_code": null, "iso_3_code": "tgb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3023", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Central", - "iso_1_code": null, - "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Central", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Kadazan Dusun", "iso_1_code": null, "iso_3_code": "dtp", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3025", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sugut Dusun", "iso_1_code": null, "iso_3_code": "kzs", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3026", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Minokok", "iso_1_code": null, "iso_3_code": "mqq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3027", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3024", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kadazan, Labuk-Kinabatangan", "iso_1_code": null, "iso_3_code": "dtb", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3029", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3028", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3017", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Dumpas", "iso_1_code": null, "iso_3_code": "dmv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3031", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3030", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3011", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ida\u2019an", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ida\u2019an", "iso_1_code": null, "iso_3_code": "dbj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3033", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3032", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Murutic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Murut", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Okolod", "iso_1_code": null, "iso_3_code": "kqv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3036", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Murut, Keningau", "iso_1_code": null, "iso_3_code": "kxi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3037", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Murut, Tahol", "iso_1_code": null, "iso_3_code": "mvv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3038", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Murut, Paluan", "iso_1_code": null, "iso_3_code": "plz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3039", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Murut, Selungai", "iso_1_code": null, "iso_3_code": "slg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3040", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Murut, Timugon", "iso_1_code": null, "iso_3_code": "tih", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3041", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3035", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Murut, Bookan", "iso_1_code": null, "iso_3_code": "bnb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3043", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3042", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Murut, Kalabakan", "iso_1_code": null, "iso_3_code": "kve", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3045", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Murut, Sembakung", "iso_1_code": null, "iso_3_code": "sbr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3046", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3044", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tidung", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tidung, Southern", "iso_1_code": null, "iso_3_code": "itd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3048", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tidung, Northern", "iso_1_code": null, "iso_3_code": "ntd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3049", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Murut, Serudung", "iso_1_code": null, "iso_3_code": "srk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3050", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3047", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Papar", "iso_1_code": null, "iso_3_code": "dpp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3052", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gana", "iso_1_code": null, "iso_3_code": "gnq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3053", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3051", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3034", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Paitanic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Abai Sungai", "iso_1_code": null, "iso_3_code": "abf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3055", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tombonuo", "iso_1_code": null, "iso_3_code": "txa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3056", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Upper Kinabatangan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kinabatangan, Upper", "iso_1_code": null, "iso_3_code": "dmg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3058", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lobu, Tampias", "iso_1_code": null, "iso_3_code": "low", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3059", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lobu, Lanas", "iso_1_code": null, "iso_3_code": "ruu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3060", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3057", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3054", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Tatana", "iso_1_code": null, "iso_3_code": "txx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3062", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3061", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3010", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "2926", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North Mangyan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Alangan", "iso_1_code": null, "iso_3_code": "alj", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3064", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Iraya", "iso_1_code": null, "iso_3_code": "iry", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3065", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tadyawan", "iso_1_code": null, "iso_3_code": "tdy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3066", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3063", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern Luzon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Arta", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Arta", "iso_1_code": null, "iso_3_code": "atz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3069", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3068", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ilocano", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Ilocano", "iso_1_code": null, "iso_3_code": "ilo", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3071", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], - "node_i": "3070", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Meso-Cordilleran", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "3070", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Meso-Cordilleran", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Alta", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Alta, Southern", "iso_1_code": null, "iso_3_code": "agy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3074", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Alta, Northern", "iso_1_code": null, "iso_3_code": "aqn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3075", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3073", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South-Central Cordilleran", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Central Cordilleran", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Isinai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Isinay", "iso_1_code": null, "iso_3_code": "inn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3079", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3078", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North Central Cordilleran", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kalinga-Itneg", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Itneg", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Itneg, Binongan", "iso_1_code": null, "iso_3_code": "itb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3083", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Itneg, Inlaud", "iso_1_code": null, "iso_3_code": "iti", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3084", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Itneg, Maeng", "iso_1_code": null, "iso_3_code": "itt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3085", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Itneg, Moyadan", "iso_1_code": null, "iso_3_code": "ity", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3086", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Itneg, Masadiit", "iso_1_code": null, "iso_3_code": "tis", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3087", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3082", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kalinga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kalinga, Vanaw", "iso_1_code": null, "iso_3_code": "bjx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3089", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kalinga, Mabaka Valley", "iso_1_code": null, "iso_3_code": "kkg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3090", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kalinga, Majukayang", "iso_1_code": null, "iso_3_code": "kmd", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3091", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kalinga, Limos", "iso_1_code": null, "iso_3_code": "kmk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3092", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kalinga, Tanudan", "iso_1_code": null, "iso_3_code": "kml", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3093", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kalinga, Lubuagan", "iso_1_code": null, "iso_3_code": "knb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3094", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kalinga, Southern", "iso_1_code": null, "iso_3_code": "ksc", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3095", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kalinga, Butbut", "iso_1_code": null, "iso_3_code": "kyb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3096", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3088", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3081", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nuclear Cordilleran", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Balangaw", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Balangao", "iso_1_code": null, "iso_3_code": "blw", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3099", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3098", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bontok-Kankanay", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bontok", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bontok, Eastern", "iso_1_code": null, "iso_3_code": "ebk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3102", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bontok, Central", "iso_1_code": null, "iso_3_code": "lbk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3103", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bontok, Southern", "iso_1_code": null, "iso_3_code": "obk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3104", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bontok, Northern", "iso_1_code": null, "iso_3_code": "rbk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3105", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bontok, Southwestern", "iso_1_code": null, "iso_3_code": "vbk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3106", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3101", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kankanay", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kankanaey", "iso_1_code": null, "iso_3_code": "kne", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3108", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kankanay, Northern", "iso_1_code": null, "iso_3_code": "xnn", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3109", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3107", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3100", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ifugaw", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Ifugao, Amganad", "iso_1_code": null, "iso_3_code": "ifa", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3111", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ifugao, Batad", "iso_1_code": null, "iso_3_code": "ifb", + "children": [], "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3112", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ifugao, Tuwali", "iso_1_code": null, "iso_3_code": "ifk", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3113", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ifugao, Mayoyao", "iso_1_code": null, "iso_3_code": "ifu", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3114", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3110", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3097", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3080", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3077", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern Cordilleran", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Ilongot", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bogkalot", "iso_1_code": null, "iso_3_code": "ilk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3117", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3116", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Southern Cordilleran", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Pangasinan", "iso_1_code": null, "iso_3_code": "pag", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3119", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Nuclear Southern Cordilleran", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Ibaloy", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ibaloi", "iso_1_code": null, "iso_3_code": "ibl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3122", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "I-wak", "iso_1_code": null, "iso_3_code": "iwk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3123", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3121", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kallahan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kallahan, Keley-i", "iso_1_code": null, "iso_3_code": "ify", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3125", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Kalanguya", "iso_1_code": null, "iso_3_code": "kak", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3126", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3124", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Karaw", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Karao", "iso_1_code": null, "iso_3_code": "kyj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3128", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3127", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3120", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3118", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3115", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3076", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3072", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern Cordilleran", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Cagayan Valley", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Ibanagic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Atta, Pudtol", "iso_1_code": null, "iso_3_code": "atp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3132", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Atta, Pamplona", "iso_1_code": null, "iso_3_code": "att", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3133", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Atta, Faire", "iso_1_code": null, "iso_3_code": "azt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3134", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ibanag", "iso_1_code": null, "iso_3_code": "ibg", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3135", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Itawit", "iso_1_code": null, "iso_3_code": "itv", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3136", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Yogad", "iso_1_code": null, "iso_3_code": "yog", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3137", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gaddangic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Agta, Central Cagayan", "iso_1_code": null, "iso_3_code": "agt", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3139", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Gaddang", "iso_1_code": null, "iso_3_code": "gad", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3140", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ga\ua78cdang", "iso_1_code": null, "iso_3_code": "gdg", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3141", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3138", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3131", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Isnag", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Isnag", "iso_1_code": null, "iso_3_code": "isd", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3143", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Adasen", "iso_1_code": null, "iso_3_code": "tiu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3144", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3142", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3130", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northeastern Luzon", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Agta, Pahanan", "iso_1_code": null, "iso_3_code": "apf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3146", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Paranan", "iso_1_code": null, "iso_3_code": "prf", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3147", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Agta, Casiguran Dumagat", "iso_1_code": null, "iso_3_code": "dgc", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3149", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Agta, Dupaninan", "iso_1_code": null, "iso_3_code": "duo", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3150", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Agta, Dicamay", "iso_1_code": null, "iso_3_code": "duy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3151", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kasiguranin", "iso_1_code": null, "iso_3_code": "ksn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3152", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3148", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3145", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3129", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3067", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northwest Sumatra-Barrier Islands", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Enggano", "iso_1_code": null, "iso_3_code": "eno", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3154", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gayo", "iso_1_code": null, "iso_3_code": "gay", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3155", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mentawai", "iso_1_code": null, "iso_3_code": "mwv", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3156", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Simeulue", "iso_1_code": null, "iso_3_code": "smr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3157", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Batak", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Batak Dairi", "iso_1_code": null, "iso_3_code": "btd", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3160", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Batak Karo", "iso_1_code": null, "iso_3_code": "btx", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3161", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Batak Alas-Kluet", "iso_1_code": null, "iso_3_code": "btz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3162", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3159", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Simalungan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Batak Simalungun", "iso_1_code": null, "iso_3_code": "bts", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3164", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3163", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Batak Angkola", "iso_1_code": null, "iso_3_code": "akb", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3166", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Batak Toba", "iso_1_code": null, "iso_3_code": "bbc", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3167", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Batak Mandailing", "iso_1_code": null, "iso_3_code": "btm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3168", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3165", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3158", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nias", "iso_1_code": null, - "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, + "iso_3_code": null, "children": [ { "name": "Nias", "iso_1_code": null, "iso_3_code": "nia", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3170", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Sikule", "iso_1_code": null, "iso_3_code": "skh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3171", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3169", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3153", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Palauan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Palauan", "iso_1_code": null, "iso_3_code": "pau", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3173", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3172", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rejang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Rejang", "iso_1_code": null, "iso_3_code": "rej", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3175", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3174", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sangiric", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Sangil", "iso_1_code": null, "iso_3_code": "snl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3178", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sangir", "iso_1_code": null, "iso_3_code": "sxn", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3179", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Talaud", "iso_1_code": null, "iso_3_code": "tld", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3180", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3177", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bantik", "iso_1_code": null, "iso_3_code": "bnq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3182", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ratahan", "iso_1_code": null, "iso_3_code": "rth", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3183", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3181", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3176", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South Sulawesi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bugis", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bugis", "iso_1_code": null, "iso_3_code": "bug", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3186", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Koneq-koneq", "iso_1_code": null, "iso_3_code": "cml", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3187", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tamanic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Embaloh", "iso_1_code": null, "iso_3_code": "emb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3189", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Taman", "iso_1_code": null, "iso_3_code": "tmn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3190", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3188", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3185", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lemolang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Limola", "iso_1_code": null, "iso_3_code": "ley", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3192", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3191", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makassar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Bentong", "iso_1_code": null, "iso_3_code": "bnu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3194", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Konjo, Coastal", "iso_1_code": null, "iso_3_code": "kjc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3195", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Konjo, Highland", "iso_1_code": null, "iso_3_code": "kjk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3196", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Makasar", "iso_1_code": null, "iso_3_code": "mak", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3197", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Selayar", "iso_1_code": null, "iso_3_code": "sly", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3198", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3193", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Mamuju", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mamuju", "iso_1_code": null, "iso_3_code": "mqx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3201", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3200", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mandar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mandar", "iso_1_code": null, "iso_3_code": "mdr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3203", - "scripts": [], - "own_tokenizer": false - } - ], - "node_i": "3202", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Masenrempulu", - "iso_1_code": null, - "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "native_tokenizers": [], + "scripts": [] } - }, + ], + "tokenizers": {}, + "node_i": "3202", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Masenrempulu", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Malimpung", "iso_1_code": null, "iso_3_code": "mli", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3205", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Duri", "iso_1_code": null, "iso_3_code": "mvp", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3206", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Enrekang", "iso_1_code": null, "iso_3_code": "ptt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3207", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maiwa", "iso_1_code": null, "iso_3_code": "wmm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3208", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3204", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pitu Ulunna Salu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Aralle-Tabulahan", "iso_1_code": null, "iso_3_code": "atq", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3210", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Dakka", "iso_1_code": null, "iso_3_code": "dkk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3211", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pannei", "iso_1_code": null, "iso_3_code": "pnc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3212", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bambam", "iso_1_code": null, "iso_3_code": "ptu", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3213", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Ulumanda\u2019", "iso_1_code": null, "iso_3_code": "ulm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3214", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3209", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Toraja-Sa\u2019dan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Kalumpang", "iso_1_code": null, "iso_3_code": "kli", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3216", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mamasa", "iso_1_code": null, "iso_3_code": "mqj", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3217", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Tae\u2019", "iso_1_code": null, "iso_3_code": "rob", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3218", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Toraja-Sa\u2019dan", "iso_1_code": null, "iso_3_code": "sda", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3219", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Talondo\u2019", "iso_1_code": null, "iso_3_code": "tln", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3220", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3215", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3199", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Seko", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Seko Tengah", "iso_1_code": null, "iso_3_code": "sko", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3222", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Seko Padang", "iso_1_code": null, "iso_3_code": "skx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3223", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Panasuan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Budong-Budong", "iso_1_code": null, "iso_3_code": "bdx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3225", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Panasuan", "iso_1_code": null, "iso_3_code": "psn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3226", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3224", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3221", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3184", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sundanese", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Badui", "iso_1_code": null, "iso_3_code": "bac", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3228", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sunda", "iso_1_code": "su", "iso_3_code": "sun", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3229", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3227", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bulungan", "iso_1_code": null, "iso_3_code": "blj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3231", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gorap", "iso_1_code": null, "iso_3_code": "goq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3232", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3230", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1452", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Northwest Formosan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pazeh", "iso_1_code": null, "iso_3_code": "pzh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3234", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kulon", "iso_1_code": null, "iso_3_code": "uon", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3235", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Saisiyat", "iso_1_code": null, "iso_3_code": "xsy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3236", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3233", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Paiwan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Paiwan", "iso_1_code": null, "iso_3_code": "pwn", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3238", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3237", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Puyuma", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Puyuma", "iso_1_code": null, "iso_3_code": "pyu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3240", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3239", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rukai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Arab": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - }, - "Thai": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true - } - }, "children": [ { "name": "Rukai", "iso_1_code": null, "iso_3_code": "dru", + "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"ms\")", - "original_lang_name": "malay", - "original_lang_code": "msa", - "scripts": [ - "Latn", - "Arab", - "Thai" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": true + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "3242", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "3241", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tsouic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Saaroa", "iso_1_code": null, "iso_3_code": "sxr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3244", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tsou", "iso_1_code": null, "iso_3_code": "tsu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3245", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kanakanabu", "iso_1_code": null, "iso_3_code": "xnb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3246", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3243", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ketangalan", "iso_1_code": null, "iso_3_code": "kae", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3248", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3247", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western Plains", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Central Western Plains", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Babuza", "iso_1_code": null, "iso_3_code": "bzg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3251", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Papora-Hoanya", "iso_1_code": null, "iso_3_code": "ppu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3252", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3250", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Thao", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Thao", "iso_1_code": null, "iso_3_code": "ssf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "3254", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3253", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "3249", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"id\")", + "original_lang_name": "indonesian", + "original_lang_code": "ind", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1436", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file