import gradio as gr import sentencepiece as spm import ctranslate2 from huggingface_hub import hf_hub_download import os languages = { "Kurdish": "ku", "Samoan": "sm", "Xhosa": "xh", "Lao": "lo", "Corsican": "co", "Cebuano": "ceb", "Galician": "gl", "Yiddish": "yi", "Swahili": "sw", "Yoruba": "yo", "English": "en", } def get_repo_id(src_lang, tgt_lang): return f"lingvanex/{src_lang.lower()}-to-{tgt_lang.lower()}-translation" def download_models(src_lang, tgt_lang): repo_id = get_repo_id(src_lang, tgt_lang) models = { "src_spm": hf_hub_download(repo_id=repo_id, filename=f"{languages[src_lang]}.spm.model"), "tgt_spm": hf_hub_download(repo_id=repo_id, filename=f"{languages[tgt_lang]}.spm.model"), "model": hf_hub_download(repo_id=repo_id, filename="model.bin"), "source_vocab": hf_hub_download(repo_id=repo_id, filename="source_vocabulary.txt"), "target_vocab": hf_hub_download(repo_id=repo_id, filename="target_vocabulary.txt"), "config": hf_hub_download(repo_id=repo_id, filename="config.json"), } return models def translate(text, src_lang, tgt_lang): if src_lang == tgt_lang: return text models = download_models(src_lang, tgt_lang) spm_encoder = spm.SentencePieceProcessor(models["src_spm"]) spm_decoder = spm.SentencePieceProcessor(models["tgt_spm"]) model_dir = os.path.dirname(models["model"]) translator = ctranslate2.Translator(model_dir, device="cpu") tokens = spm_encoder.encode(text, out_type=str) result = translator.translate_batch([tokens]) output = spm_decoder.decode(result[0].hypotheses[0], out_type=str) return output def update_target_lang(src_lang): if src_lang == "English": return gr.Dropdown(choices=sorted(languages.keys())), "" else: return gr.Dropdown(choices=["English"]), "Note: Translations are only supported from this language to English." with gr.Blocks() as demo: gr.Markdown("# Multilingual Translation with Lingvanex") gr.Markdown(""" This translator allows you to translate text between English and a variety of other languages. Please note that translations are supported only in the following directions: - From English to the target language (e.g., English → Kurdish). - From the source language to English (e.g., Kurdish → English). For this demo, language-specific model pairs are utilized, so translations between two non-English languages are not supported at this time. """) with gr.Row(): src_lang = gr.Dropdown(choices=sorted(languages.keys()), label="Source Language", value="English") tgt_lang = gr.Dropdown(choices=sorted(languages.keys()), label="Target Language", value="Kurdish") note = gr.Markdown("") src_lang.change(update_target_lang, inputs=src_lang, outputs=[tgt_lang, note]) text_input = gr.Textbox(label="Input Text", placeholder="Enter text to translate...") text_output = gr.Textbox(label="Translated Text") translate_btn = gr.Button("Translate") translate_btn.click(translate, inputs=[text_input, src_lang, tgt_lang], outputs=text_output) examples = gr.Examples( examples=[ ["Hello, how are you?", "English", "Kurdish"], ["Silav halê we çawa ye?", "Kurdish", "English"], ], inputs=[text_input, src_lang, tgt_lang], ) demo.launch(share=True)