import gradio as gr
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import requests
NEL_MODEL_NAME = "impresso-project/nel-mgenre-multilingual"
# Load the tokenizer and model from the specified pre-trained model name
# The model used here is "https://huggingface.co/impresso-project/nel-mgenre-multilingual"
nel_tokenizer = AutoTokenizer.from_pretrained(
"impresso-project/nel-mgenre-multilingual"
)
nel_pipeline = pipeline(
"generic-nel",
model=NEL_MODEL_NAME,
tokenizer=nel_tokenizer,
trust_remote_code=True,
device="cpu",
)
print("Model loaded successfully!")
def disambiguate_sentence(sentence):
# Generate model outputs for the sentence
linked_entity = nel_pipeline(sentence)
linked_entity = linked_entity[0]
# Create an HTML output with a clickable link
entity_info = f"""
Entity: {linked_entity['surface']}
Wikidata QID: {linked_entity['wkd_id']}
Wikipedia Title: {linked_entity['wkpedia_pagename']}
Wikipedia Page
"""
return entity_info
def nel_app_interface():
input_sentence = gr.Textbox(
lines=5,
label="Input Sentence",
placeholder="Enter your sentence here:",
)
output_entities = gr.HTML(label="Linked Entities:")
# Interface definition
interface = gr.Interface(
fn=disambiguate_sentence,
inputs=input_sentence,
outputs=output_entities,
title="Entity Linking with impresso-project/nel-hipe-multilingual",
description="Link entities using the `impresso-project/nel-hipe-multilingual` model under the hood! "
"We recommend using shorter texts (ie sentences, not full paragraphs).
"
"The sentences in the following format:
"
"<< We are going to `[START]` Paris `[END]` >>
"
"This format ensures that the model knows which entities to disambiguate, more exactly the "
"entity should be surrounded by `[START]` and `[END]`.
"
"Warning: Only one entity per sentence is supported at the moment!",
examples=[
[
"Des chercheurs de l' [START] Université de Cambridge [END] ont développé une nouvelle technique de calcul quantique qui promet d'augmenter exponentiellement les vitesses de calcul."
],
[
"Le rapport complet sur ces découvertes a été publié dans la prestigieuse revue 'Nature Physics'. ([START] Reuters [END])"
],
[
"In the [START] year 1789 [END], the Estates-General was convened in France."
],
[
"The event was held at the [START] Palace of Versailles [END], a symbol of French monarchy."
],
[
"At Versailles, [START] Antoinette, the Queen of France [END], was involved in discussions."
],
[
"[START] Maximilien Robespierre [END], a leading member of the National Assembly, also participated."
],
[
"[START] Jean-Jacques Rousseau, the famous philosopher [END], was a significant figure in the debate."
],
[
"Another important participant was [START] Charles de Talleyrand, the Bishop of Autun [END]."
],
[
"Meanwhile, across the Atlantic, [START] George Washington, the first President of the United States [END], was shaping policies."
],
[
"[START] Thomas Jefferson, the nation's Secretary of State [END], played a key role in drafting policies for the new American government."
],
],
)
interface.launch()
if __name__ == "__main__":
nel_app_interface()