Spaces:
Running
Running
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import sentencepiece as spm
|
3 |
+
import ctranslate2
|
4 |
+
from huggingface_hub import hf_hub_download
|
5 |
+
import os
|
6 |
+
|
7 |
+
languages = {
|
8 |
+
"Kurdish": "ku",
|
9 |
+
"Samoan": "sm",
|
10 |
+
"Xhosa": "xh",
|
11 |
+
"Lao": "lo",
|
12 |
+
"Corsican": "co",
|
13 |
+
"Cebuano": "ceb",
|
14 |
+
"Galician": "gl",
|
15 |
+
"Yiddish": "yi",
|
16 |
+
"Swahili": "sw",
|
17 |
+
"Yoruba": "yo",
|
18 |
+
"English": "en",
|
19 |
+
}
|
20 |
+
|
21 |
+
def get_repo_id(src_lang, tgt_lang):
|
22 |
+
return f"lingvanex/{src_lang.lower()}-to-{tgt_lang.lower()}-translation"
|
23 |
+
|
24 |
+
def download_models(src_lang, tgt_lang):
|
25 |
+
repo_id = get_repo_id(src_lang, tgt_lang)
|
26 |
+
models = {
|
27 |
+
"src_spm": hf_hub_download(repo_id=repo_id, filename=f"{languages[src_lang]}.spm.model"),
|
28 |
+
"tgt_spm": hf_hub_download(repo_id=repo_id, filename=f"{languages[tgt_lang]}.spm.model"),
|
29 |
+
"model": hf_hub_download(repo_id=repo_id, filename="model.bin"),
|
30 |
+
"source_vocab": hf_hub_download(repo_id=repo_id, filename="source_vocabulary.txt"),
|
31 |
+
"target_vocab": hf_hub_download(repo_id=repo_id, filename="target_vocabulary.txt"),
|
32 |
+
"config": hf_hub_download(repo_id=repo_id, filename="config.json"),
|
33 |
+
}
|
34 |
+
return models
|
35 |
+
|
36 |
+
def translate(text, src_lang, tgt_lang):
|
37 |
+
if src_lang == tgt_lang:
|
38 |
+
return text
|
39 |
+
|
40 |
+
models = download_models(src_lang, tgt_lang)
|
41 |
+
|
42 |
+
spm_encoder = spm.SentencePieceProcessor(models["src_spm"])
|
43 |
+
spm_decoder = spm.SentencePieceProcessor(models["tgt_spm"])
|
44 |
+
|
45 |
+
model_dir = os.path.dirname(models["model"])
|
46 |
+
|
47 |
+
translator = ctranslate2.Translator(model_dir, device="cpu")
|
48 |
+
|
49 |
+
tokens = spm_encoder.encode(text, out_type=str)
|
50 |
+
|
51 |
+
result = translator.translate_batch([tokens])
|
52 |
+
|
53 |
+
output = spm_decoder.decode(result[0].hypotheses[0], out_type=str)
|
54 |
+
|
55 |
+
return output
|
56 |
+
|
57 |
+
|
58 |
+
def update_target_lang(src_lang):
|
59 |
+
if src_lang == "English":
|
60 |
+
return gr.Dropdown(choices=sorted(languages.keys())), ""
|
61 |
+
else:
|
62 |
+
return gr.Dropdown(choices=["English"]), "Note: Translations are only supported from this language to English."
|
63 |
+
|
64 |
+
|
65 |
+
with gr.Blocks() as demo:
|
66 |
+
gr.Markdown("# Multilingual Translation with Lingvanex")
|
67 |
+
gr.Markdown("""
|
68 |
+
This translator allows you to translate text between English and a variety of other languages. Please note that translations are supported only in the following directions:
|
69 |
+
- From English to the target language (e.g., English → Kurdish).
|
70 |
+
- From the source language to English (e.g., Kurdish → English).
|
71 |
+
For this demo, language-specific model pairs are utilized, so translations between two non-English languages are not supported at this time.
|
72 |
+
""")
|
73 |
+
|
74 |
+
with gr.Row():
|
75 |
+
src_lang = gr.Dropdown(choices=sorted(languages.keys()), label="Source Language", value="English")
|
76 |
+
tgt_lang = gr.Dropdown(choices=sorted(languages.keys()), label="Target Language", value="Kurdish")
|
77 |
+
note = gr.Markdown("")
|
78 |
+
|
79 |
+
src_lang.change(update_target_lang, inputs=src_lang, outputs=[tgt_lang, note])
|
80 |
+
|
81 |
+
text_input = gr.Textbox(label="Input Text", placeholder="Enter text to translate...")
|
82 |
+
text_output = gr.Textbox(label="Translated Text")
|
83 |
+
|
84 |
+
examples = gr.Examples(
|
85 |
+
examples=[
|
86 |
+
["Hello, how are you?", "English", "Kurdish"],
|
87 |
+
["Silav halê we çawa ye?", "Kurdish", "English"],
|
88 |
+
],
|
89 |
+
inputs=[text_input, src_lang, tgt_lang],
|
90 |
+
)
|
91 |
+
|
92 |
+
translate_btn = gr.Button("Translate")
|
93 |
+
translate_btn.click(translate, inputs=[text_input, src_lang, tgt_lang], outputs=text_output)
|
94 |
+
|
95 |
+
demo.launch(share=True)
|