emanuelaboros's picture
update app
6627fc9
raw
history blame
4.07 kB
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import requests
tokenizer = AutoTokenizer.from_pretrained("impresso-project/nel-hipe-multilingual")
model = AutoModelForSeq2SeqLM.from_pretrained(
"impresso-project/nel-hipe-multilingual"
).eval()
print("Model loaded successfully!")
def get_wikipedia_title(qid, language="en"):
url = f"https://www.wikidata.org/w/api.php"
params = {
"action": "wbgetentities",
"format": "json",
"ids": qid,
"props": "sitelinks/urls",
"sitefilter": f"{language}wiki",
}
response = requests.get(url, params=params)
data = response.json()
try:
title = data["entities"][qid]["sitelinks"][f"{language}wiki"]["title"]
url = data["entities"][qid]["sitelinks"][f"{language}wiki"]["url"]
return title, url
except KeyError:
return "NIL", "None"
def disambiguate_sentence(sentence):
entities = []
# Generate model outputs for the sentence
outputs = model.generate(
**tokenizer([sentence], return_tensors="pt"),
num_beams=5,
num_return_sequences=5,
max_new_tokens=30,
)
decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)
qid = decoded[0].split()[-1] # Assuming QID is the last token in the output
# Get Wikipedia title and URL
title, url = get_wikipedia_title(qid)
entity_info = f"QID: {qid}, Title: {title}, URL: {url}"
entities.append(entity_info)
print(f"Entities: {entities}")
return {"text": sentence, "entities": entities}
def nel_app_interface():
input_sentence = gr.Textbox(
lines=5,
label="Input Sentence",
placeholder="Enter your sentence here in the following format: // << We are going to [START] Paris [END]. >>"
" // This format ensures that the model knows which entities to disambiguate, more exactly the "
"entity should be surrounded by `[START]` and `[END]`. // "
"!Only one entity per sentence is supported at the moment!",
)
output_entities = gr.Textbox(label="Linked Entities")
# Interface definition
interface = gr.Interface(
fn=disambiguate_sentence,
inputs=input_sentence,
outputs=output_entities,
title="Entity Linking with impresso-project/nel-hipe-multilingual",
description="Link entities using the `impresso-project/nel-hipe-multilingual` model under the hood!",
examples=[
[
"Des chercheurs de l' [START] Université de Cambridge [END] ont développé une nouvelle technique de calcul "
"quantique qui promet d'augmenter exponentiellement les vitesses de calcul.",
"Le rapport complet sur ces découvertes a été publié dans la prestigieuse revue 'Nature Physics'. ([START] "
"Reuters [END])",
"In the [START] year 1789 [END], the Estates-General was convened in France.",
"[START] King Louis XVI, ruler of France [END], called for the meeting.",
"The event was held at the [START] Palace of Versailles [END], a symbol of French monarchy.",
"At Versailles, Marie Antoinette, the Queen of France, was involved in discussions.",
"Maximilien Robespierre, a leading member of the National Assembly, also participated.",
"[START] Jean-Jacques Rousseau, the famous philosopher [END], was a significant figure in the debate.",
"Another important participant was [START] Charles de Talleyrand, the Bishop of Autun [END].",
"Meanwhile, across the Atlantic, [START] George Washington, the first President of the United States [END], "
"was shaping policies.",
"[START] Thomas Jefferson, the nation's Secretary of State [END], played a key role in drafting policies for "
"the new American government.",
]
],
)
interface.launch()
if __name__ == "__main__":
nel_app_interface()