|
import gradio as gr |
|
from transformers import AutoModelForSeq2SeqLM |
|
from transformers import AutoTokenizer |
|
|
|
model = AutoModelForSeq2SeqLM.from_pretrained('hackathon-pln-es/t5-small-spanish-nahuatl') |
|
tokenizer = AutoTokenizer.from_pretrained('hackathon-pln-es/t5-small-spanish-nahuatl') |
|
|
|
def predict(input): |
|
input_ids = tokenizer('translate Spanish to Nahuatl: ' + input, return_tensors='pt').input_ids |
|
outputs = model.generate(input_ids) |
|
outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0] |
|
return outputs |
|
|
|
gr.Interface( |
|
fn=predict, |
|
inputs=gr.inputs.Textbox(lines=1, label="Input Text in Spanish"), |
|
outputs=[ |
|
gr.outputs.Textbox(label="Translated text in Nahuatl"), |
|
], |
|
theme="peach", |
|
title='🌽 Spanish to Nahuatl Automatic Translation', |
|
description='This model is a T5 Transformer (t5-small) fine-tuned on 29,007 spanish and nahuatl sentences using 12,890 samples collected from the web and 16,117 samples from the Axolotl dataset. The dataset is normalized using "sep" normalization from py-elotl. For more details visit https://huggingface.co/hackathon-pln-es/t5-small-spanish-nahuatl', |
|
examples=[ |
|
'hola', |
|
'conejo', |
|
'estrella', |
|
'te quiero mucho', |
|
'te amo', |
|
'quiero comer', |
|
'esto se llama agua', |
|
'mi abuelo se llama Juan', |
|
'te amo con todo mi corazón'], |
|
allow_flagging="manual", |
|
flagging_options=["right translation", "wrong translation", "error", "other"] |
|
).launch(enable_queue=True) |
|
|