zionia's picture
adjust layout
3363f16 verified
raw
history blame
3.43 kB
import gradio as gr
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
model_name = "dsfsi/nso-en-m2m100-gov"
tokenizer = M2M100Tokenizer.from_pretrained(model_name)
model = M2M100ForConditionalGeneration.from_pretrained(model_name)
print(tokenizer.lang_code_to_token)
tokenizer.src_lang = "ns"
model.config.forced_bos_token_id = tokenizer.get_lang_id("en")
def translate(inp):
inputs = tokenizer(inp, return_tensors="pt")
translated_tokens = model.generate(**inputs, max_length=512, forced_bos_token_id=tokenizer.get_lang_id("en"))
translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
return translated_text
logo = """
<div style='text-align: center;'>
<img src='file/logo_transparent_small.png' alt='Logo' width='150'/>
</div>
"""
description = """
<p style='text-align: center;'>
Northern Sotho to English Translation
</p>
<p>
This space provides a translation service from Northern Sotho to English using the M2M100 model, fine-tuned for low-resource languages. It supports researchers, linguists, and users working with Northern Sotho texts.
</p>
"""
article = """
<div style='text-align: center;'>
<a href='https://github.com/dsfsi/nso-en-m2m100-gov' target='_blank'>GitHub</a> |
<a href='https://docs.google.com/forms/d/e/1FAIpQLSf7S36dyAUPx2egmXbFpnTBuzoRulhL5Elu-N1eoMhaO7v10w/viewform' target='_blank'>Feedback Form</a> |
<a href='https://arxiv.org/abs/2303.03750' target='_blank'>Arxiv</a>
</div>
<br/>
"""
extra_info = """
<div style='text-align: center;'>
<h4>More information about the space</h4>
</div>
<p>
This is a variant of the M2M100 model, fine-tuned on a multilingual dataset to support translation from Northern Sotho (Sepedi) to English. The model was trained with a focus on improving translation accuracy for low-resource languages.
</p>
"""
authors = """
<div style='text-align: center;'>
Authors: Vukosi Marivate, Matimba Shingange, Richard Lastrucci, Isheanesu Joseph Dzingirai, Jenalea Rajab
</div>
"""
citation = """
@inproceedings{lastrucci-etal-2023-preparing,
title = "Preparing the Vuk{'}uzenzele and {ZA}-gov-multilingual {S}outh {A}frican multilingual corpora",
author = "Richard Lastrucci and Isheanesu Dzingirai and Jenalea Rajab and Andani Madodonga and Matimba Shingange and Daniel Njini and Vukosi Marivate",
booktitle = "Proceedings of the Fourth workshop on Resources for African Indigenous Languages (RAIL 2023)",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.rail-1.3",
pages = "18--25"
}
"""
doi = """
<div style='text-align: center;'>
DOI: <a href="https://doi.org/10.48550/arXiv.2303.03750" target="_blank">10.48550/arXiv.2303.03750</a>
</div>
"""
with gr.Blocks() as demo:
gr.Markdown(logo)
gr.Markdown(description)
gr.Markdown(article)
textbox = gr.Textbox(lines=5, placeholder="Enter Northern Sotho text (maximum 5 lines)", label="Input")
output_text = gr.Textbox(label="Translation")
gr.Interface(
fn=translate,
inputs=textbox,
outputs=output_text,
title="Northern Sotho to English Translation"
)
gr.Markdown(extra_info)
gr.Markdown(authors)
gr.Markdown(citation)
gr.Markdown(doi)
demo.launch(enable_queue=True)