Spaces:
Sleeping
Sleeping
File size: 3,129 Bytes
3d4f13a b5f7961 3d4f13a 4f63972 ef9b88b 3b68341 ef9b88b 965bd13 b02baad 3b68341 4f70f9f b5f7961 3d4f13a e7da02f b02baad e7da02f b02baad 1335053 b02baad 1335053 b02baad 1335053 4f70f9f b02baad 4f70f9f b02baad 1335053 4f63972 1335053 b02baad 4f70f9f b02baad 1335053 b02baad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import gradio as gr
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
model_name = "dsfsi/nso-en-m2m100-gov"
tokenizer = M2M100Tokenizer.from_pretrained(model_name)
model = M2M100ForConditionalGeneration.from_pretrained(model_name)
print(tokenizer.lang_code_to_token)
tokenizer.src_lang = "ns"
model.config.forced_bos_token_id = tokenizer.get_lang_id("en")
def translate(inp):
inputs = tokenizer(inp, return_tensors="pt")
translated_tokens = model.generate(**inputs, max_length=512, forced_bos_token_id=tokenizer.get_lang_id("en"))
translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
return translated_text
img1, img2, img3 = gr.Columns(3)
with img2:
gr.Image("logo_transparent_small.png", alt="DSFSI Logo", elem_id="logo", label=None)
description = """
<p style='text-align: center;'>
Northern Sotho to English Translation
</p>
<p>
This space provides a translation service from Northern Sotho to English using the M2M100 model, fine-tuned for low-resource languages. It supports researchers, linguists, and users working with Northern Sotho texts.
</p>
"""
article = """
<div style='text-align: center;'>
<a href='https://github.com/dsfsi/nso-en-m2m100-gov' target='_blank'>GitHub</a> |
<a href='https://docs.google.com/forms/d/e/1FAIpQLSf7S36dyAUPx2egmXbFpnTBuzoRulhL5Elu-N1eoMhaO7v10w/viewform' target='_blank'>Feedback Form</a> |
<a href='https://arxiv.org/abs/2303.03750' target='_blank'>Arxiv</a>
</div>
<br/>
<p style='text-align: center;'>
<h2>Translate | Northern Sotho to English (dsfsi/nso-en-m2m100-gov)</h2>
</p>
"""
extra_info = """
<div style='text-align: center;'>
<h4>More information about the space</h4>
</div>
<p>
This is a variant of the M2M100 model, fine-tuned on a multilingual dataset to support translation from Northern Sotho (Sepedi) to English. The model was trained with a focus on improving translation accuracy for low-resource languages.
</p>
"""
with gr.Interface(
fn=translate,
title="Northern Sotho to English Translation",
description=description,
article=article,
inputs=gr.components.Textbox(lines=5, placeholder="Enter Northern Sotho text (maximum 5 lines)", label="Input"),
outputs="text"
) as iface:
iface.launch(enable_queue=True)
authors = """
<div style='text-align: center;'>
Authors: Vukosi Marivate, Matimba Shingange, Richard Lastrucci, Isheanesu Joseph Dzingirai, Jenalea Rajab
</div>
"""
citation = """
<div style='text-align: center;'>
<p>
@inproceedings{{dsfsi2024, title={{Northern Sotho to English Translation using M2M100}},
author={{DSFSI Research Team}}, year={{2024}},
url={{https://huggingface.co/dsfsi/nso-en-m2m100-gov}}
}}
</p>
</div>
"""
doi = """
<div style='text-align: center;'>
DOI: <a href="https://doi.org/10.1234/dsfsi.2024.001" target="_blank">10.1234/dsfsi.2024.001</a>
</div>
"""
gr.markdown(extra_info, unsafe_allow_html=True)
gr.markdown(authors, unsafe_allow_html=True)
gr.markdown(citation, unsafe_allow_html=True)
gr.markdown(doi, unsafe_allow_html=True)
|