emanuelaboros's picture
update app
1a30cc1
raw
history blame
5.87 kB
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import requests
tokenizer = AutoTokenizer.from_pretrained("impresso-project/nel-hipe-multilingual")
model = AutoModelForSeq2SeqLM.from_pretrained(
"impresso-project/nel-hipe-multilingual"
).eval()
print("Model loaded successfully!")
def get_wikipedia_page_props(input_str: str):
"""
Retrieves the QID for a given Wikipedia page name from the specified language Wikipedia.
If the request fails, it falls back to using the OpenRefine Wikidata API.
Args:
input_str (str): The input string in the format "page_name >> language".
Returns:
str: The QID or "NIL" if the QID is not found.
"""
try:
# Preprocess the input string
page_name, language = input_str.split(" >> ")
page_name = page_name.strip()
language = language.strip()
except ValueError:
return "Invalid input format. Use 'page_name >> language'."
wikipedia_url = f"https://{language}.wikipedia.org/w/api.php"
wikipedia_params = {
"action": "query",
"prop": "pageprops",
"format": "json",
"titles": page_name,
}
qid = "NIL"
try:
# Attempt to fetch from Wikipedia API
response = requests.get(wikipedia_url, params=wikipedia_params)
response.raise_for_status()
data = response.json()
if "pages" in data["query"]:
page_id = list(data["query"]["pages"].keys())[0]
if "pageprops" in data["query"]["pages"][page_id]:
page_props = data["query"]["pages"][page_id]["pageprops"]
if "wikibase_item" in page_props:
return page_props["wikibase_item"]
else:
return qid
else:
return qid
except Exception as e:
return qid
def get_wikipedia_title(qid, language="en"):
url = f"https://www.wikidata.org/w/api.php"
params = {
"action": "wbgetentities",
"format": "json",
"ids": qid,
"props": "sitelinks/urls",
"sitefilter": f"{language}wiki",
}
response = requests.get(url, params=params)
data = response.json()
try:
title = data["entities"][qid]["sitelinks"][f"{language}wiki"]["title"]
url = data["entities"][qid]["sitelinks"][f"{language}wiki"]["url"]
return title, url
except KeyError:
return "NIL", "None"
def disambiguate_sentence(sentence):
# Generate model outputs for the sentence
outputs = model.generate(
**tokenizer([sentence], return_tensors="pt"),
num_beams=5,
num_return_sequences=5,
max_new_tokens=30,
)
decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)
print(f"Decoded: {decoded}")
wikipedia_name = decoded[0] # Assuming the entity name is in the output
qid = get_wikipedia_page_props(wikipedia_name)
print(f"QID: {qid}")
# Get Wikipedia title and URL
title, url = get_wikipedia_title(qid)
if qid == "NIL":
return "No entity found."
# Create an HTML output with a clickable link
entity_info = f"""
<div>
<strong>Entity:</strong> {title} <br>
<strong>QID:</strong> {qid} <br>
<a href="{url}" target="_blank">Wikipedia Page</a>
</div>
"""
return entity_info
def nel_app_interface():
input_sentence = gr.Textbox(
lines=5,
label="Input Sentence",
placeholder="Enter your sentence here in the following format: // << We are going to [START] Paris [END]. >>"
" // This format ensures that the model knows which entities to disambiguate, more exactly the "
"entity should be surrounded by `[START]` and `[END]`. // "
"!Only one entity per sentence is supported at the moment!",
)
output_entities = gr.HTML(label="Linked Entity")
# Interface definition
interface = gr.Interface(
fn=disambiguate_sentence,
inputs=input_sentence,
outputs=output_entities,
title="Entity Linking with impresso-project/nel-hipe-multilingual",
description="Link entities using the `impresso-project/nel-hipe-multilingual` model under the hood!",
examples=[
[
"Des chercheurs de l' [START] Université de Cambridge [END] ont développé une nouvelle technique de calcul "
"quantique qui promet d'augmenter exponentiellement les vitesses de calcul.",
"Le rapport complet sur ces découvertes a été publié dans la prestigieuse revue 'Nature Physics'. ([START] "
"Reuters [END])",
"In the [START] year 1789 [END], the Estates-General was convened in France.",
"[START] King Louis XVI, ruler of France [END], called for the meeting.",
"The event was held at the [START] Palace of Versailles [END], a symbol of French monarchy.",
"At Versailles, Marie Antoinette, the Queen of France, was involved in discussions.",
"Maximilien Robespierre, a leading member of the National Assembly, also participated.",
"[START] Jean-Jacques Rousseau, the famous philosopher [END], was a significant figure in the debate.",
"Another important participant was [START] Charles de Talleyrand, the Bishop of Autun [END].",
"Meanwhile, across the Atlantic, [START] George Washington, the first President of the United States [END], "
"was shaping policies.",
"[START] Thomas Jefferson, the nation's Secretary of State [END], played a key role in drafting policies for "
"the new American government.",
]
],
)
interface.launch()
if __name__ == "__main__":
nel_app_interface()