File size: 5,234 Bytes
69f6746
 
 
 
 
 
 
 
1fc7291
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
691539b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import torch
import gradio as gr

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("translation", model="facebook/nllb-200-distilled-600M", torch_dtype = torch.bfloat16)

languages = {
  "languages": [
    {"name": "Acehnese (Arabic script)", "code": "ace_Arab"},
    {"name": "Acehnese (Latin script)", "code": "ace_Latn"},
    {"name": "Mesopotamian Arabic", "code": "acm_Arab"},
    {"name": "Ta’izzi-Adeni Arabic", "code": "acq_Arab"},
    {"name": "Tunisian Arabic", "code": "aeb_Arab"},
    {"name": "Afrikaans", "code": "afr_Latn"},
    {"name": "South Levantine Arabic", "code": "ajp_Arab"},
    {"name": "Akan", "code": "aka_Latn"},
    {"name": "Amharic", "code": "amh_Ethi"},
    {"name": "North Levantine Arabic", "code": "apc_Arab"},
    {"name": "Modern Standard Arabic", "code": "arb_Arab"},
    {"name": "Modern Standard Arabic (Romanized)", "code": "arb_Latn"},
    {"name": "Najdi Arabic", "code": "ars_Arab"},
    {"name": "Moroccan Arabic", "code": "ary_Arab"},
    {"name": "Egyptian Arabic", "code": "arz_Arab"},
    {"name": "Assamese", "code": "asm_Beng"},
    {"name": "Asturian", "code": "ast_Latn"},
    {"name": "Awadhi", "code": "awa_Deva"},
    {"name": "Central Aymara", "code": "ayr_Latn"},
    {"name": "South Azerbaijani", "code": "azb_Arab"},
    {"name": "North Azerbaijani", "code": "azj_Latn"},
    {"name": "Bashkir", "code": "bak_Cyrl"},
    {"name": "Bambara", "code": "bam_Latn"},
    {"name": "Balinese", "code": "ban_Latn"},
    {"name": "Belarusian", "code": "bel_Cyrl"},
    {"name": "Bemba", "code": "bem_Latn"},
    {"name": "Bengali", "code": "ben_Beng"},
    {"name": "Bhojpuri", "code": "bho_Deva"},
    {"name": "Banjar (Arabic script)", "code": "bjn_Arab"},
    {"name": "Banjar (Latin script)", "code": "bjn_Latn"},
    {"name": "Standard Tibetan", "code": "bod_Tibt"},
    {"name": "Bosnian", "code": "bos_Latn"},
    {"name": "Buginese", "code": "bug_Latn"},
    {"name": "Bulgarian", "code": "bul_Cyrl"},
    {"name": "Catalan", "code": "cat_Latn"},
    {"name": "Cebuano", "code": "ceb_Latn"},
    {"name": "Czech", "code": "ces_Latn"},
    {"name": "Chokwe", "code": "cjk_Latn"},
    {"name": "Central Kurdish", "code": "ckb_Arab"},
    {"name": "Crimean Tatar", "code": "crh_Latn"},
    {"name": "Welsh", "code": "cym_Latn"},
    {"name": "Danish", "code": "dan_Latn"},
    {"name": "German", "code": "deu_Latn"},
    {"name": "Southwestern Dinka", "code": "dik_Latn"},
    {"name": "Dyula", "code": "dyu_Latn"},
    {"name": "Dzongkha", "code": "dzo_Tibt"},
    {"name": "Greek", "code": "ell_Grek"},
    {"name": "English", "code": "eng_Latn"},
    {"name": "Esperanto", "code": "epo_Latn"},
    {"name": "Estonian", "code": "est_Latn"},
    {"name": "Basque", "code": "eus_Latn"},
    {"name": "Ewe", "code": "ewe_Latn"},
    {"name": "Faroese", "code": "fao_Latn"},
    {"name": "Fijian", "code": "fij_Latn"},
    {"name": "Finnish", "code": "fin_Latn"},
    {"name": "Fon", "code": "fon_Latn"},
    {"name": "French", "code": "fra_Latn"},
    {"name": "Friulian", "code": "fur_Latn"},
    {"name": "Nigerian Fulfulde", "code": "fuv_Latn"},
    {"name": "Scottish Gaelic", "code": "gla_Latn"},
    {"name": "Irish", "code": "gle_Latn"},
    {"name": "Galician", "code": "glg_Latn"},
    {"name": "Guarani", "code": "grn_Latn"},
    {"name": "Gujarati", "code": "guj_Gujr"},
    {"name": "Haitian Creole", "code": "hat_Latn"},
    {"name": "Hausa", "code": "hau_Latn"},
    {"name": "Hebrew", "code": "heb_Hebr"},
    {"name": "Hindi", "code": "hin_Deva"},
    {"name": "Chhattisgarhi", "code": "hne_Deva"},
    {"name": "Croatian", "code": "hrv_Latn"},
    {"name": "Hungarian", "code": "hun_Latn"},
    {"name": "Armenian", "code": "hye_Armn"},
    {"name": "Igbo", "code": "ibo_Latn"}
  ]
}

def get_fores_code(language):
    # Loop through the list of languages
    for entry in languages['languages']:
        if entry['name'].lower() == language.lower():  # Compare to the 'name' field in the dictionary
            return entry['code']
    return None  # Return None if no match is found


def translate_text(text, destination_language):
    # Convert language name to language code using get_fores_code function
    dest_code = get_fores_code(destination_language) 
    
    # Ensure the code is valid (for debugging purposes)
    print(f"Destination language code: {dest_code}")
    
    # Call the translation pipeline with the correct language codes
    translation = pipe(text, 
                       src_lang='eng_Latn', 
                       tgt_lang=dest_code)  # Pass the actual code (no quotes)
    
    return translation[0]['translation_text']

demo = gr.Interface(fn=translate_text,
                   inputs= [
                       gr.TextArea(label='Insert the text in English to translate'),
                       gr.Dropdown(
                           label='Select the language to translate',
                           choices=[lang['name'] for lang in languages['languages']],  # Extract the 'name' field
                           value='English'
                       )    
                   ],
                   outputs=[gr.TextArea(label='This is the translated text')])

demo.launch()