|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
import gradio as gr |
|
|
|
import torch |
|
|
|
first_generation = True |
|
prefix = '' |
|
device = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
|
|
model_checkpoint = "fermaat/es_nlp_text_neutralizer" |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) |
|
|
|
model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint) |
|
|
|
model.config.max_length = 512 |
|
model.to(device) |
|
|
|
|
|
|
|
def postproc(input_sentence, preds): |
|
if preds[0].islower(): |
|
preds = preds.capitalize() |
|
|
|
output_temp = preds.replace('. ', '.').split('.') |
|
if len(output_temp) > 1: |
|
preds = "" |
|
for frase in output_temp: |
|
if frase: |
|
preds = preds + frase.capitalize() + '. ' |
|
preds = preds.strip() |
|
preds = preds.replace('De el', 'Del').replace('de el', 'del') |
|
|
|
|
|
for word in input_sentence.split(' '): |
|
if word[0].isupper(): |
|
if word.lower() in preds: |
|
preds = preds.replace(word.lower() + ' ', word + ' ') |
|
return preds |
|
|
|
|
|
|
|
|
|
def get_output(sentence, first_generation=True): |
|
inputs = tokenizer([prefix + sentence], return_tensors="pt", padding=True) |
|
with torch.no_grad(): |
|
if first_generation: |
|
output_sequence = model.generate( |
|
input_ids=inputs["input_ids"].to(device), |
|
attention_mask=inputs["attention_mask"].to(device), |
|
do_sample=False, |
|
) |
|
else: |
|
|
|
output_sequence = model.generate( |
|
input_ids=inputs["input_ids"].to(device), |
|
attention_mask=inputs["attention_mask"].to(device), |
|
do_sample=False, |
|
num_beams=2, |
|
repetition_penalty=2.5, |
|
|
|
early_stopping=True |
|
) |
|
preds = postproc(input_sentence=sentence, |
|
preds=tokenizer.decode(output_sequence[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)) |
|
return preds |
|
|
|
|
|
examples=['De acuerdo con las informaciones anteriores , las alumnas se han quejado de la actitud de los profesores en los exámenes finales. Los representantes estudiantiles son los alumnos Juanju y Javi.', |
|
'Durante su ingreso no debe tomar agua que no le sea suministrada por los especialistas en su cirujía', |
|
'Los intermediarios del acuerdo se ocuparán también de saber si la casa tiene las deudas saldadas y si no hay problemas legales pendientes'] |
|
|
|
iface = gr.Interface(fn=get_output, |
|
title="Spanish Text Neutralization app", |
|
description="The purpose of this app is to transform Spanish gendered text into a neutral version, suitable for an unbiased environment", |
|
examples=examples, |
|
|
|
inputs=gr.inputs.Textbox(label="Introduce some Spanish text here"), |
|
|
|
outputs=gr.outputs.Textbox(label="Neutral version of your text") |
|
) |
|
iface.launch() |