nllb / app.py
davanstrien's picture
davanstrien HF Staff
Update instructions in translation demo
0badc10
raw
history blame
3.17 kB
import spaces
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from flores import code_mapping
import platform
device = "cpu" if platform.system() == "Darwin" else "cuda"
MODEL_NAME = (
"facebook/nllb-200-distilled-600M"
if platform.system() == "Darwin"
else "facebook/nllb-200-3.3B"
)
code_mapping = dict(sorted(code_mapping.items(), key=lambda item: item[1]))
flores_codes = list(code_mapping.keys())
def load_model():
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME).to(device)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
return model, tokenizer
model, tokenizer = load_model()
@spaces.GPU
def _translate(text: str, src_lang: str, tgt_lang: str):
source = code_mapping[src_lang]
target = code_mapping[tgt_lang]
translator = pipeline(
"translation",
model=model,
tokenizer=tokenizer,
src_lang=source,
tgt_lang=target,
device=device,
)
output = translator(text, max_length=400)
return output[0]["translation_text"]
def translate(text: str, src_lang: str, tgt_lang: str):
# split the input text into smaller chunks
# split first on newlines
outputs = ""
paragraph_chunks = text.split("\n")
for chunk in paragraph_chunks:
# check if the chunk is too long
if len(chunk) > 500:
# split on full stops
sentence_chunks = chunk.split(".")
for sentence in sentence_chunks:
outputs += f"{_translate(sentence, src_lang, tgt_lang)}. "
else:
outputs += _translate(chunk, src_lang, tgt_lang) + "\n\n"
return outputs
description = """
No Language Left Behind (NLLB) is a series of open-source models aiming to provide high-quality translations between 200 language.
This demo application allows you to use the NLLB model to translate text between a source and target language.
## Notes
- Whilst the model supports 200 languages, the quality of translations may vary between languages.
- "Low Resource" languages (languages which are less present on the internet and have a lower amount of investment) may have lower quality translations.
- The demo is not intended to be used for very long texts.
"""
instructions = """
1. Select the source and target language from the dropdown menus.
2. Enter the text you would like to translate.
3. Click the 'Translate text' button.
"""
with gr.Blocks() as demo:
gr.Markdown("# No Language Left Behind (NLLB) Translation Demo")
gr.Markdown(description)
gr.Markdown("## Instructions")
gr.Markdown(instructions)
with gr.Row():
src_lang = gr.Dropdown(label="Source Language", choices=flores_codes)
target_lang = gr.Dropdown(label="Target Language", choices=flores_codes)
with gr.Row():
input_text = gr.Textbox(label="Input Text", lines=6)
with gr.Row():
btn = gr.Button("Translate text")
with gr.Row():
output = gr.Textbox(label="Output Text", lines=6)
btn.click(
translate,
inputs=[input_text, src_lang, target_lang],
outputs=output,
)
demo.launch()