import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("impresso-project/nel-hipe-multilingual")
model = AutoModelForSeq2SeqLM.from_pretrained(
    "impresso-project/nel-hipe-multilingual"
).eval()


def disambiguate_sentences(sentences):
    results = []
    for sentence in sentences:
        outputs = model.generate(
            **tokenizer([sentence], return_tensors="pt"),
            num_beams=5,
            num_return_sequences=5
        )
        decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)
        results.append(decoded)
    return results


input_sentences = gr.inputs.Textbox(
    lines=5,
    label="Input Sentences",
    placeholder="Enter your sentence here in the following format: \\  `It is reported in [START] Paris [END], "
    "that the opening of the chambers will take place on the 27th January.' \\ "
    "This format ensures that the model knows which entities to disambiguate, more exactly the entity should "
    "be surrounded by `[START]` and `[END]`.",
)
output_predictions = gr.outputs.Textbox(label="Predictions")

gr.Interface(
    fn=disambiguate_sentences,
    inputs=input_sentences,
    outputs=output_predictions,
    title="NEL Disambiguation",
).launch()