nllb_language_codes: dict[str, str] = { "Acehnese (Arabic script)": "ace_Arab", "Acehnese (Latin script)": "ace_Latn", "Mesopotamian Arabic": "acm_Arab", "Ta’izzi-Adeni Arabic": "acq_Arab", "Tunisian Arabic": "aeb_Arab", "Afrikaans": "afr_Latn", "South Levantine Arabic": "ajp_Arab", "Akan": "aka_Latn", "Amharic": "amh_Ethi", "North Levantine Arabic": "apc_Arab", "Modern Standard Arabic": "arb_Arab", "Modern Standard Arabic (Romanized)": "arb_Latn", "Najdi Arabic": "ars_Arab", "Moroccan Arabic": "ary_Arab", "Egyptian Arabic": "arz_Arab", "Assamese": "asm_Beng", "Asturian": "ast_Latn", "Awadhi": "awa_Deva", "Central Aymara": "ayr_Latn", "South Azerbaijani": "azb_Arab", "North Azerbaijani": "azj_Latn", "Bashkir": "bak_Cyrl", "Bambara": "bam_Latn", "Balinese": "ban_Latn", "Belarusian": "bel_Cyrl", "Bemba": "bem_Latn", "Bengali": "ben_Beng", "Bhojpuri": "bho_Deva", "Banjar (Arabic script)": "bjn_Arab", "Banjar (Latin script)": "bjn_Latn", "Standard Tibetan": "bod_Tibt", "Bosnian": "bos_Latn", "Buginese": "bug_Latn", "Bulgarian": "bul_Cyrl", "Catalan": "cat_Latn", "Cebuano": "ceb_Latn", "Czech": "ces_Latn", "Chokwe": "cjk_Latn", "Central Kurdish": "ckb_Arab", "Crimean Tatar": "crh_Latn", "Welsh": "cym_Latn", "Danish": "dan_Latn", "German": "deu_Latn", "Southwestern Dinka": "dik_Latn", "Dyula": "dyu_Latn", "Dzongkha": "dzo_Tibt", "Greek": "ell_Grek", "English": "eng_Latn", "Esperanto": "epo_Latn", "Estonian": "est_Latn", "Basque": "eus_Latn", "Ewe": "ewe_Latn", "Faroese": "fao_Latn", "Fijian": "fij_Latn", "Finnish": "fin_Latn", "Fon": "fon_Latn", "French": "fra_Latn", "Friulian": "fur_Latn", "Nigerian Fulfulde": "fuv_Latn", "Scottish Gaelic": "gla_Latn", "Irish": "gle_Latn", "Galician": "glg_Latn", "Guarani": "grn_Latn", "Gujarati": "guj_Gujr", "Haitian Creole": "hat_Latn", "Hausa": "hau_Latn", "Hebrew": "heb_Hebr", "Hindi": "hin_Deva", "Chhattisgarhi": "hne_Deva", "Croatian": "hrv_Latn", "Hungarian": "hun_Latn", "Armenian": "hye_Armn", "Igbo": "ibo_Latn", "Ilocano": "ilo_Latn", "Indonesian": "ind_Latn", "Icelandic": "isl_Latn", "Italian": "ita_Latn", "Javanese": "jav_Latn", "Japanese": "jpn_Jpan", "Kabyle": "kab_Latn", "Jingpho": "kac_Latn", "Kamba": "kam_Latn", "Kannada": "kan_Knda", "Kashmiri (Arabic script)": "kas_Arab", "Kashmiri (Devanagari script)": "kas_Deva", "Georgian": "kat_Geor", "Central Kanuri (Arabic script)": "knc_Arab", "Central Kanuri (Latin script)": "knc_Latn", "Kazakh": "kaz_Cyrl", "Kabiyè": "kbp_Latn", "Kabuverdianu": "kea_Latn", "Khmer": "khm_Khmr", "Kikuyu": "kik_Latn", "Kinyarwanda": "kin_Latn", "Kyrgyz": "kir_Cyrl", "Kimbundu": "kmb_Latn", "Northern Kurdish": "kmr_Latn", "Kikongo": "kon_Latn", "Korean": "kor_Hang", "Lao": "lao_Laoo", "Ligurian": "lij_Latn", "Limburgish": "lim_Latn", "Lingala": "lin_Latn", "Lithuanian": "lit_Latn", "Lombard": "lmo_Latn", "Latgalian": "ltg_Latn", "Luxembourgish": "ltz_Latn", "Luba-Kasai": "lua_Latn", "Ganda": "lug_Latn", "Luo": "luo_Latn", "Mizo": "lus_Latn", "Standard Latvian": "lvs_Latn", "Magahi": "mag_Deva", "Maithili": "mai_Deva", "Malayalam": "mal_Mlym", "Marathi": "mar_Deva", "Minangkabau (Arabic script)": "min_Arab", "Minangkabau (Latin script)": "min_Latn", "Macedonian": "mkd_Cyrl", "Plateau Malagasy": "plt_Latn", "Maltese": "mlt_Latn", "Meitei (Bengali script)": "mni_Beng", "Halh Mongolian": "khk_Cyrl", "Mossi": "mos_Latn", "Maori": "mri_Latn", "Burmese": "mya_Mymr", "Dutch": "nld_Latn", "Norwegian Nynorsk": "nno_Latn", "Norwegian Bokmål": "nob_Latn", "Nepali": "npi_Deva", "Northern Sotho": "nso_Latn", "Nuer": "nus_Latn", "Nyanja": "nya_Latn", "Occitan": "oci_Latn", "West Central Oromo": "gaz_Latn", "Odia": "ory_Orya", "Pangasinan": "pag_Latn", "Eastern Panjabi": "pan_Guru", "Papiamento": "pap_Latn", "Western Persian": "pes_Arab", "Polish": "pol_Latn", "Portuguese": "por_Latn", "Dari": "prs_Arab", "Southern Pashto": "pbt_Arab", "Ayacucho Quechua": "quy_Latn", "Romanian": "ron_Latn", "Rundi": "run_Latn", "Russian": "rus_Cyrl", "Sango": "sag_Latn", "Sanskrit": "san_Deva", "Santali": "sat_Olck", "Sicilian": "scn_Latn", "Shan": "shn_Mymr", "Sinhala": "sin_Sinh", "Slovak": "slk_Latn", "Slovenian": "slv_Latn", "Samoan": "smo_Latn", "Shona": "sna_Latn", "Sindhi": "snd_Arab", "Somali": "som_Latn", "Southern Sotho": "sot_Latn", "Spanish": "spa_Latn", "Tosk Albanian": "als_Latn", "Sardinian": "srd_Latn", "Serbian": "srp_Cyrl", "Swati": "ssw_Latn", "Sundanese": "sun_Latn", "Swedish": "swe_Latn", "Swahili": "swh_Latn", "Silesian": "szl_Latn", "Tamil": "tam_Taml", "Tatar": "tat_Cyrl", "Telugu": "tel_Telu", "Tajik": "tgk_Cyrl", "Tagalog": "tgl_Latn", "Thai": "tha_Thai", "Tigrinya": "tir_Ethi", "Tamasheq (Latin script)": "taq_Latn", "Tamasheq (Tifinagh script)": "taq_Tfng", "Tok Pisin": "tpi_Latn", "Tswana": "tsn_Latn", "Tsonga": "tso_Latn", "Turkmen": "tuk_Latn", "Tumbuka": "tum_Latn", "Turkish": "tur_Latn", "Twi": "twi_Latn", "Central Atlas Tamazight": "tzm_Tfng", "Uyghur": "uig_Arab", "Ukrainian": "ukr_Cyrl", "Umbundu": "umb_Latn", "Urdu": "urd_Arab", "Northern Uzbek": "uzn_Latn", "Venetian": "vec_Latn", "Vietnamese": "vie_Latn", "Waray": "war_Latn", "Wolof": "wol_Latn", "Xhosa": "xho_Latn", "Eastern Yiddish": "ydd_Hebr", "Yoruba": "yor_Latn", "Yue Chinese": "yue_Hant", "Chinese (Simplified)": "zho_Hans", "Chinese (Traditional)": "zho_Hant", "Standard Malay": "zsm_Latn", "Zulu": "zul_Latn", } mbart_large_languages = { 'Arabic': 'ar_AR', 'Czech': 'cs_CZ', 'German': 'de_DE', 'English': 'en_XX', 'Spanish': 'es_XX', 'Estonian': 'et_EE', 'Finnish': 'fi_FI', 'French': 'fr_XX', 'Gujarati': 'gu_IN', 'Hindi': 'hi_IN', 'Italian': 'it_IT', 'Japanese': 'ja_XX', 'Kazakh': 'kk_KZ', 'Korean': 'ko_KR', 'Lithuanian': 'lt_LT', 'Latvian': 'lv_LV', 'Burmese': 'my_MM', 'Nepali': 'ne_NP', 'Dutch': 'nl_XX', 'Romanian': 'ro_RO', 'Russian': 'ru_RU', 'Sinhala': 'si_LK', 'Turkish': 'tr_TR', 'Vietnamese': 'vi_VN', 'Chinese': 'zh_CN', 'Afrikaans': 'af_ZA', 'Azerbaijani': 'az_AZ', 'Bengali': 'bn_IN', 'Persian': 'fa_IR', 'Hebrew': 'he_IL', 'Croatian': 'hr_HR', 'Indonesian': 'id_ID', 'Georgian': 'ka_GE', 'Khmer': 'km_KH', 'Macedonian': 'mk_MK', 'Malayalam': 'ml_IN', 'Mongolian': 'mn_MN', 'Marathi': 'mr_IN', 'Polish': 'pl_PL', 'Pashto': 'ps_AF', 'Portuguese': 'pt_XX', 'Swedish': 'sv_SE', 'Swahili': 'sw_KE', 'Tamil': 'ta_IN', 'Telugu': 'te_IN', 'Thai': 'th_TH', 'Tagalog': 'tl_XX', 'Ukrainian': 'uk_UA', 'Urdu': 'ur_PK', 'Xhosa': 'xh_ZA', 'Galician': 'gl_ES', 'Slovene': 'sl_SI' } # language code system: ISO 639-1 standard, two-letter codes to represent languages iso_languages = { "Afrikaans": "af", "Albanian": "sq", "Amharic": "am", "Arabic": "ar", "Armenian": "hy", "Azerbaijani": "az", "Basque": "eu", "Belarusian": "be", "Bengali": "bn", "Bosnian": "bs", "Bulgarian": "bg", "Catalan": "ca", "Cebuano": "ceb", "Chinese (Simplified)": "zh-CN", "Chinese (Traditional)": "zh-TW", "Corsican": "co", "Croatian": "hr", "Czech": "cs", "Danish": "da", "Dutch": "nl", "English": "en", "Esperanto": "eo", "Estonian": "et", "Finnish": "fi", "French": "fr", "Galician": "gl", "Georgian": "ka", "German": "de", "Greek": "el", "Gujarati": "gu", "Haitian Creole": "ht", "Hausa": "ha", "Hawaiian": "haw", "Hebrew": "he", "Hindi": "hi", "Hungarian": "hu", "Icelandic": "is", "Igbo": "ig", "Indonesian": "id", "Irish": "ga", "Italian": "it", "Japanese": "ja", "Javanese": "jv", "Kannada": "kn", "Kazakh": "kk", "Khmer": "km", "Kinyarwanda": "rw", "Korean": "ko", "Kurdish (Kurmanji)": "ku", "Kyrgyz": "ky", "Lao": "lo", "Latin": "la", "Latvian": "lv", "Lithuanian": "lt", "Luxembourgish": "lb", "Macedonian": "mk", "Malagasy": "mg", "Malay": "ms", "Malayalam": "ml", "Maltese": "mt", "Maori": "mi", "Marathi": "mr", "Mongolian": "mn", "Myanmar (Burmese)": "my", "Nepali": "ne", "Norwegian": "no", "Nyanja (Chichewa)": "ny", "Odia (Oriya)": "or", "Pashto": "ps", "Persian": "fa", "Polish": "pl", "Portuguese": "pt", "Punjabi": "pa", "Romanian": "ro", "Russian": "ru", "Samoan": "sm", "Scots Gaelic": "gd", "Serbian": "sr", "Sesotho": "st", "Shona": "sn", "Sindhi": "sd", "Sinhala": "si", "Slovak": "sk", "Slovenian": "sl", "Somali": "so", "Spanish": "es", "Sundanese": "su", "Swahili": "sw", "Swedish": "sv", "Tagalog (Filipino)": "tl", "Tajik": "tg", "Tamil": "ta", "Tatar": "tt", "Telugu": "te", "Thai": "th", "Turkish": "tr", "Turkmen": "tk", "Ukrainian": "uk", "Urdu": "ur", "Uyghur": "ug", "Uzbek": "uz", "Vietnamese": "vi", "Welsh": "cy", "Xhosa": "xh", "Yiddish": "yi", "Yoruba": "yo", "Zulu": "zu" }