|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
import requests |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("impresso-project/nel-hipe-multilingual") |
|
model = AutoModelForSeq2SeqLM.from_pretrained( |
|
"impresso-project/nel-hipe-multilingual" |
|
).eval() |
|
|
|
print("Model loaded successfully!") |
|
|
|
|
|
def get_wikipedia_title(qid, language="en"): |
|
url = f"https://www.wikidata.org/w/api.php" |
|
params = { |
|
"action": "wbgetentities", |
|
"format": "json", |
|
"ids": qid, |
|
"props": "sitelinks/urls", |
|
"sitefilter": f"{language}wiki", |
|
} |
|
|
|
response = requests.get(url, params=params) |
|
data = response.json() |
|
|
|
try: |
|
title = data["entities"][qid]["sitelinks"][f"{language}wiki"]["title"] |
|
url = data["entities"][qid]["sitelinks"][f"{language}wiki"]["url"] |
|
return title, url |
|
except KeyError: |
|
return "NIL", "None" |
|
|
|
|
|
def disambiguate_sentence(sentence): |
|
entities = [] |
|
|
|
outputs = model.generate( |
|
**tokenizer([sentence], return_tensors="pt"), |
|
num_beams=5, |
|
num_return_sequences=5, |
|
max_new_tokens=30, |
|
) |
|
decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True) |
|
qid = decoded[0].split()[-1] |
|
|
|
|
|
title, url = get_wikipedia_title(qid) |
|
|
|
entity_info = f"QID: {qid}, Title: {title}, URL: {url}" |
|
entities.append(entity_info) |
|
|
|
print(f"Entities: {entities}") |
|
return {"text": sentence, "entities": entities} |
|
|
|
|
|
def nel_app_interface(): |
|
input_sentence = gr.Textbox( |
|
lines=5, |
|
label="Input Sentence", |
|
placeholder="Enter your sentence here in the following format: // << We are going to [START] Paris [END]. >>" |
|
" // This format ensures that the model knows which entities to disambiguate, more exactly the " |
|
"entity should be surrounded by `[START]` and `[END]`. // " |
|
"!Only one entity per sentence is supported at the moment!", |
|
) |
|
output_entities = gr.Textbox(label="Linked Entities") |
|
|
|
|
|
interface = gr.Interface( |
|
fn=disambiguate_sentence, |
|
inputs=input_sentence, |
|
outputs=output_entities, |
|
title="Entity Linking with impresso-project/nel-hipe-multilingual", |
|
description="Link entities using the `impresso-project/nel-hipe-multilingual` model under the hood!", |
|
examples=[ |
|
[ |
|
"Des chercheurs de l' [START] Université de Cambridge [END] ont développé une nouvelle technique de calcul " |
|
"quantique qui promet d'augmenter exponentiellement les vitesses de calcul.", |
|
"Le rapport complet sur ces découvertes a été publié dans la prestigieuse revue 'Nature Physics'. ([START] " |
|
"Reuters [END])", |
|
"In the [START] year 1789 [END], the Estates-General was convened in France.", |
|
"[START] King Louis XVI, ruler of France [END], called for the meeting.", |
|
"The event was held at the [START] Palace of Versailles [END], a symbol of French monarchy.", |
|
"At Versailles, Marie Antoinette, the Queen of France, was involved in discussions.", |
|
"Maximilien Robespierre, a leading member of the National Assembly, also participated.", |
|
"[START] Jean-Jacques Rousseau, the famous philosopher [END], was a significant figure in the debate.", |
|
"Another important participant was [START] Charles de Talleyrand, the Bishop of Autun [END].", |
|
"Meanwhile, across the Atlantic, [START] George Washington, the first President of the United States [END], " |
|
"was shaping policies.", |
|
"[START] Thomas Jefferson, the nation's Secretary of State [END], played a key role in drafting policies for " |
|
"the new American government.", |
|
] |
|
], |
|
) |
|
|
|
interface.launch() |
|
|
|
|
|
if __name__ == "__main__": |
|
nel_app_interface() |
|
|