|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("impresso-project/nel-hipe-multilingual") |
|
model = AutoModelForSeq2SeqLM.from_pretrained( |
|
"impresso-project/nel-hipe-multilingual" |
|
).eval() |
|
|
|
|
|
def disambiguate_sentences(sentences): |
|
results = [] |
|
for sentence in sentences: |
|
outputs = model.generate( |
|
**tokenizer([sentence], return_tensors="pt"), |
|
num_beams=5, |
|
num_return_sequences=5 |
|
) |
|
decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True) |
|
results.append(decoded) |
|
return results |
|
|
|
|
|
input_sentences = gr.inputs.Textbox( |
|
lines=5, |
|
label="Input Sentences", |
|
placeholder="Enter your sentence here in the following format: \\ `It is reported in [START] Paris [END], " |
|
"that the opening of the chambers will take place on the 27th January.' \\ " |
|
"This format ensures that the model knows which entities to disambiguate, more exactly the entity should " |
|
"be surrounded by `[START]` and `[END]`.", |
|
) |
|
output_predictions = gr.outputs.Textbox(label="Predictions") |
|
|
|
gr.Interface( |
|
fn=disambiguate_sentences, |
|
inputs=input_sentences, |
|
outputs=output_predictions, |
|
title="NEL Disambiguation", |
|
).launch() |
|
|