Spaces:
Running
Running
import gradio as gr | |
import sentencepiece as spm | |
import ctranslate2 | |
from huggingface_hub import hf_hub_download | |
import os | |
languages = { | |
"Kurdish": "ku", | |
"Samoan": "sm", | |
"Xhosa": "xh", | |
"Lao": "lo", | |
"Corsican": "co", | |
"Cebuano": "ceb", | |
"Galician": "gl", | |
"Yiddish": "yi", | |
"Swahili": "sw", | |
"Yoruba": "yo", | |
"English": "en", | |
} | |
def get_repo_id(src_lang, tgt_lang): | |
return f"lingvanex/{src_lang.lower()}-to-{tgt_lang.lower()}-translation" | |
def download_models(src_lang, tgt_lang): | |
repo_id = get_repo_id(src_lang, tgt_lang) | |
models = { | |
"src_spm": hf_hub_download(repo_id=repo_id, filename=f"{languages[src_lang]}.spm.model"), | |
"tgt_spm": hf_hub_download(repo_id=repo_id, filename=f"{languages[tgt_lang]}.spm.model"), | |
"model": hf_hub_download(repo_id=repo_id, filename="model.bin"), | |
"source_vocab": hf_hub_download(repo_id=repo_id, filename="source_vocabulary.txt"), | |
"target_vocab": hf_hub_download(repo_id=repo_id, filename="target_vocabulary.txt"), | |
"config": hf_hub_download(repo_id=repo_id, filename="config.json"), | |
} | |
return models | |
def translate(text, src_lang, tgt_lang): | |
if src_lang == tgt_lang: | |
return text | |
models = download_models(src_lang, tgt_lang) | |
spm_encoder = spm.SentencePieceProcessor(models["src_spm"]) | |
spm_decoder = spm.SentencePieceProcessor(models["tgt_spm"]) | |
model_dir = os.path.dirname(models["model"]) | |
translator = ctranslate2.Translator(model_dir, device="cpu") | |
tokens = spm_encoder.encode(text, out_type=str) | |
result = translator.translate_batch([tokens]) | |
output = spm_decoder.decode(result[0].hypotheses[0], out_type=str) | |
return output | |
def update_target_lang(src_lang): | |
if src_lang == "English": | |
return gr.Dropdown(choices=sorted(languages.keys())), "" | |
else: | |
return gr.Dropdown(choices=["English"]), "Note: Translations are only supported from this language to English." | |
with gr.Blocks() as demo: | |
gr.Markdown("# Multilingual Translation with Lingvanex") | |
gr.Markdown(""" | |
This translator allows you to translate text between English and a variety of other languages. Please note that translations are supported only in the following directions: | |
- From English to the target language (e.g., English → Kurdish). | |
- From the source language to English (e.g., Kurdish → English). | |
For this demo, language-specific model pairs are utilized, so translations between two non-English languages are not supported at this time. | |
""") | |
with gr.Row(): | |
src_lang = gr.Dropdown(choices=sorted(languages.keys()), label="Source Language", value="English") | |
tgt_lang = gr.Dropdown(choices=sorted(languages.keys()), label="Target Language", value="Kurdish") | |
note = gr.Markdown("") | |
src_lang.change(update_target_lang, inputs=src_lang, outputs=[tgt_lang, note]) | |
text_input = gr.Textbox(label="Input Text", placeholder="Enter text to translate...") | |
text_output = gr.Textbox(label="Translated Text") | |
translate_btn = gr.Button("Translate") | |
translate_btn.click(translate, inputs=[text_input, src_lang, tgt_lang], outputs=text_output) | |
examples = gr.Examples( | |
examples=[ | |
["Hello, how are you?", "English", "Kurdish"], | |
["Silav halê we çawa ye?", "Kurdish", "English"], | |
], | |
inputs=[text_input, src_lang, tgt_lang], | |
) | |
translate_btn = gr.Button("Translate") | |
translate_btn.click(translate, inputs=[text_input, src_lang, tgt_lang], outputs=text_output) | |
demo.launch(share=True) |