lingvanex's picture
Create app.py
1c9197b verified
raw
history blame
3.46 kB
import gradio as gr
import sentencepiece as spm
import ctranslate2
from huggingface_hub import hf_hub_download
import os
languages = {
"Kurdish": "ku",
"Samoan": "sm",
"Xhosa": "xh",
"Lao": "lo",
"Corsican": "co",
"Cebuano": "ceb",
"Galician": "gl",
"Yiddish": "yi",
"Swahili": "sw",
"Yoruba": "yo",
"English": "en",
}
def get_repo_id(src_lang, tgt_lang):
return f"lingvanex/{src_lang.lower()}-to-{tgt_lang.lower()}-translation"
def download_models(src_lang, tgt_lang):
repo_id = get_repo_id(src_lang, tgt_lang)
models = {
"src_spm": hf_hub_download(repo_id=repo_id, filename=f"{languages[src_lang]}.spm.model"),
"tgt_spm": hf_hub_download(repo_id=repo_id, filename=f"{languages[tgt_lang]}.spm.model"),
"model": hf_hub_download(repo_id=repo_id, filename="model.bin"),
"source_vocab": hf_hub_download(repo_id=repo_id, filename="source_vocabulary.txt"),
"target_vocab": hf_hub_download(repo_id=repo_id, filename="target_vocabulary.txt"),
"config": hf_hub_download(repo_id=repo_id, filename="config.json"),
}
return models
def translate(text, src_lang, tgt_lang):
if src_lang == tgt_lang:
return text
models = download_models(src_lang, tgt_lang)
spm_encoder = spm.SentencePieceProcessor(models["src_spm"])
spm_decoder = spm.SentencePieceProcessor(models["tgt_spm"])
model_dir = os.path.dirname(models["model"])
translator = ctranslate2.Translator(model_dir, device="cpu")
tokens = spm_encoder.encode(text, out_type=str)
result = translator.translate_batch([tokens])
output = spm_decoder.decode(result[0].hypotheses[0], out_type=str)
return output
def update_target_lang(src_lang):
if src_lang == "English":
return gr.Dropdown(choices=sorted(languages.keys())), ""
else:
return gr.Dropdown(choices=["English"]), "Note: Translations are only supported from this language to English."
with gr.Blocks() as demo:
gr.Markdown("# Multilingual Translation with Lingvanex")
gr.Markdown("""
This translator allows you to translate text between English and a variety of other languages. Please note that translations are supported only in the following directions:
- From English to the target language (e.g., English → Kurdish).
- From the source language to English (e.g., Kurdish → English).
For this demo, language-specific model pairs are utilized, so translations between two non-English languages are not supported at this time.
""")
with gr.Row():
src_lang = gr.Dropdown(choices=sorted(languages.keys()), label="Source Language", value="English")
tgt_lang = gr.Dropdown(choices=sorted(languages.keys()), label="Target Language", value="Kurdish")
note = gr.Markdown("")
src_lang.change(update_target_lang, inputs=src_lang, outputs=[tgt_lang, note])
text_input = gr.Textbox(label="Input Text", placeholder="Enter text to translate...")
text_output = gr.Textbox(label="Translated Text")
examples = gr.Examples(
examples=[
["Hello, how are you?", "English", "Kurdish"],
["Silav halê we çawa ye?", "Kurdish", "English"],
],
inputs=[text_input, src_lang, tgt_lang],
)
translate_btn = gr.Button("Translate")
translate_btn.click(translate, inputs=[text_input, src_lang, tgt_lang], outputs=text_output)
demo.launch(share=True)