lingvanex commited on
Commit
1c9197b
·
verified ·
1 Parent(s): 8c3c42f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -0
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import sentencepiece as spm
3
+ import ctranslate2
4
+ from huggingface_hub import hf_hub_download
5
+ import os
6
+
7
+ languages = {
8
+ "Kurdish": "ku",
9
+ "Samoan": "sm",
10
+ "Xhosa": "xh",
11
+ "Lao": "lo",
12
+ "Corsican": "co",
13
+ "Cebuano": "ceb",
14
+ "Galician": "gl",
15
+ "Yiddish": "yi",
16
+ "Swahili": "sw",
17
+ "Yoruba": "yo",
18
+ "English": "en",
19
+ }
20
+
21
+ def get_repo_id(src_lang, tgt_lang):
22
+ return f"lingvanex/{src_lang.lower()}-to-{tgt_lang.lower()}-translation"
23
+
24
+ def download_models(src_lang, tgt_lang):
25
+ repo_id = get_repo_id(src_lang, tgt_lang)
26
+ models = {
27
+ "src_spm": hf_hub_download(repo_id=repo_id, filename=f"{languages[src_lang]}.spm.model"),
28
+ "tgt_spm": hf_hub_download(repo_id=repo_id, filename=f"{languages[tgt_lang]}.spm.model"),
29
+ "model": hf_hub_download(repo_id=repo_id, filename="model.bin"),
30
+ "source_vocab": hf_hub_download(repo_id=repo_id, filename="source_vocabulary.txt"),
31
+ "target_vocab": hf_hub_download(repo_id=repo_id, filename="target_vocabulary.txt"),
32
+ "config": hf_hub_download(repo_id=repo_id, filename="config.json"),
33
+ }
34
+ return models
35
+
36
+ def translate(text, src_lang, tgt_lang):
37
+ if src_lang == tgt_lang:
38
+ return text
39
+
40
+ models = download_models(src_lang, tgt_lang)
41
+
42
+ spm_encoder = spm.SentencePieceProcessor(models["src_spm"])
43
+ spm_decoder = spm.SentencePieceProcessor(models["tgt_spm"])
44
+
45
+ model_dir = os.path.dirname(models["model"])
46
+
47
+ translator = ctranslate2.Translator(model_dir, device="cpu")
48
+
49
+ tokens = spm_encoder.encode(text, out_type=str)
50
+
51
+ result = translator.translate_batch([tokens])
52
+
53
+ output = spm_decoder.decode(result[0].hypotheses[0], out_type=str)
54
+
55
+ return output
56
+
57
+
58
+ def update_target_lang(src_lang):
59
+ if src_lang == "English":
60
+ return gr.Dropdown(choices=sorted(languages.keys())), ""
61
+ else:
62
+ return gr.Dropdown(choices=["English"]), "Note: Translations are only supported from this language to English."
63
+
64
+
65
+ with gr.Blocks() as demo:
66
+ gr.Markdown("# Multilingual Translation with Lingvanex")
67
+ gr.Markdown("""
68
+ This translator allows you to translate text between English and a variety of other languages. Please note that translations are supported only in the following directions:
69
+ - From English to the target language (e.g., English → Kurdish).
70
+ - From the source language to English (e.g., Kurdish → English).
71
+ For this demo, language-specific model pairs are utilized, so translations between two non-English languages are not supported at this time.
72
+ """)
73
+
74
+ with gr.Row():
75
+ src_lang = gr.Dropdown(choices=sorted(languages.keys()), label="Source Language", value="English")
76
+ tgt_lang = gr.Dropdown(choices=sorted(languages.keys()), label="Target Language", value="Kurdish")
77
+ note = gr.Markdown("")
78
+
79
+ src_lang.change(update_target_lang, inputs=src_lang, outputs=[tgt_lang, note])
80
+
81
+ text_input = gr.Textbox(label="Input Text", placeholder="Enter text to translate...")
82
+ text_output = gr.Textbox(label="Translated Text")
83
+
84
+ examples = gr.Examples(
85
+ examples=[
86
+ ["Hello, how are you?", "English", "Kurdish"],
87
+ ["Silav halê we çawa ye?", "Kurdish", "English"],
88
+ ],
89
+ inputs=[text_input, src_lang, tgt_lang],
90
+ )
91
+
92
+ translate_btn = gr.Button("Translate")
93
+ translate_btn.click(translate, inputs=[text_input, src_lang, tgt_lang], outputs=text_output)
94
+
95
+ demo.launch(share=True)