File size: 3,968 Bytes
06e9286 048754e 06e9286 6627fc9 06e9286 048754e 06e9286 aeeec0d 06e9286 94113a9 6627fc9 048754e 6627fc9 048754e 1a30cc1 ac76025 789d78c a5412ea f4e99e2 1a30cc1 06e9286 94113a9 ac76025 94113a9 048754e 94113a9 ac76025 74ff55c 842dbe5 ac76025 682e72c 842dbe5 94113a9 ac76025 8458bdc ac76025 a5412ea ac76025 94113a9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import gradio as gr
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import requests
NEL_MODEL_NAME = "impresso-project/nel-mgenre-multilingual"
# Load the tokenizer and model from the specified pre-trained model name
# The model used here is "https://huggingface.co/impresso-project/nel-mgenre-multilingual"
nel_tokenizer = AutoTokenizer.from_pretrained(
"impresso-project/nel-mgenre-multilingual"
)
nel_pipeline = pipeline(
"generic-nel",
model=NEL_MODEL_NAME,
tokenizer=nel_tokenizer,
trust_remote_code=True,
device="cpu",
)
print("Model loaded successfully!")
def disambiguate_sentence(sentence):
# Generate model outputs for the sentence
linked_entity = nel_pipeline(sentence)
linked_entity = linked_entity[0]
# Create an HTML output with a clickable link
entity_info = f"""<div>
<strong>Entity:</strong> {linked_entity['surface']} <br>
<strong>Wikidata QID:</strong> {linked_entity['wkd_id']} <br>
<strong>Wikipedia Title:</strong> {linked_entity['wkpedia_pagename']} <br>
<a href="{linked_entity['wkpedia_url']}" target="_blank">Wikipedia Page</a>
</div>
"""
return entity_info
def nel_app_interface():
input_sentence = gr.Textbox(
lines=5,
label="Input Sentence",
placeholder="Enter your sentence here:",
)
output_entities = gr.HTML(label="Linked Entities:")
# Interface definition
interface = gr.Interface(
fn=disambiguate_sentence,
inputs=input_sentence,
outputs=output_entities,
title="Entity Linking with impresso-project/nel-hipe-multilingual",
description="Link entities using the `impresso-project/nel-hipe-multilingual` model under the hood! "
"We recommend using shorter texts (ie sentences, not full paragraphs). <br>"
"The sentences in the following format: <br>"
"<it><< We are going to `[START]` Paris `[END]` >></it> <br>"
"This format ensures that the model knows which entities to disambiguate, more exactly the "
"entity should be surrounded by `[START]` and `[END]`. <br> <br>"
"<b>Warning<b>: Only one entity per sentence is supported at the moment!",
examples=[
[
"Des chercheurs de l' [START] Université de Cambridge [END] ont développé une nouvelle technique de calcul quantique qui promet d'augmenter exponentiellement les vitesses de calcul."
],
[
"Le rapport complet sur ces découvertes a été publié dans la prestigieuse revue 'Nature Physics'. ([START] Reuters [END])"
],
[
"In the [START] year 1789 [END], the Estates-General was convened in France."
],
[
"The event was held at the [START] Palace of Versailles [END], a symbol of French monarchy."
],
[
"At Versailles, [START] Antoinette, the Queen of France [END], was involved in discussions."
],
[
"[START] Maximilien Robespierre [END], a leading member of the National Assembly, also participated."
],
[
"[START] Jean-Jacques Rousseau, the famous philosopher [END], was a significant figure in the debate."
],
[
"Another important participant was [START] Charles de Talleyrand, the Bishop of Autun [END]."
],
[
"Meanwhile, across the Atlantic, [START] George Washington, the first President of the United States [END], was shaping policies."
],
[
"[START] Thomas Jefferson, the nation's Secretary of State [END], played a key role in drafting policies for the new American government."
],
],
)
interface.launch()
if __name__ == "__main__":
nel_app_interface()
|