File size: 3,968 Bytes
06e9286
048754e
06e9286
6627fc9
06e9286
048754e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
06e9286
aeeec0d
 
06e9286
94113a9
6627fc9
048754e
6627fc9
048754e
1a30cc1
 
ac76025
789d78c
a5412ea
 
f4e99e2
1a30cc1
 
 
06e9286
 
94113a9
 
 
 
ac76025
94113a9
048754e
94113a9
 
 
 
 
 
 
ac76025
74ff55c
 
842dbe5
ac76025
682e72c
842dbe5
94113a9
 
ac76025
 
 
 
 
 
 
 
 
 
 
 
8458bdc
ac76025
 
a5412ea
ac76025
 
 
 
 
 
 
 
 
 
 
 
 
94113a9
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import gradio as gr
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import requests


NEL_MODEL_NAME = "impresso-project/nel-mgenre-multilingual"

# Load the tokenizer and model from the specified pre-trained model name
# The model used here is "https://huggingface.co/impresso-project/nel-mgenre-multilingual"
nel_tokenizer = AutoTokenizer.from_pretrained(
    "impresso-project/nel-mgenre-multilingual"
)

nel_pipeline = pipeline(
    "generic-nel",
    model=NEL_MODEL_NAME,
    tokenizer=nel_tokenizer,
    trust_remote_code=True,
    device="cpu",
)

print("Model loaded successfully!")


def disambiguate_sentence(sentence):
    # Generate model outputs for the sentence
    linked_entity = nel_pipeline(sentence)

    linked_entity = linked_entity[0]

    # Create an HTML output with a clickable link
    entity_info = f"""<div>
        <strong>Entity:</strong> {linked_entity['surface']} <br>
        <strong>Wikidata QID:</strong> {linked_entity['wkd_id']} <br>
        <strong>Wikipedia Title:</strong> {linked_entity['wkpedia_pagename']} <br>
        <a href="{linked_entity['wkpedia_url']}" target="_blank">Wikipedia Page</a>
    </div>
    """
    return entity_info


def nel_app_interface():
    input_sentence = gr.Textbox(
        lines=5,
        label="Input Sentence",
        placeholder="Enter your sentence here:",
    )
    output_entities = gr.HTML(label="Linked Entities:")

    # Interface definition
    interface = gr.Interface(
        fn=disambiguate_sentence,
        inputs=input_sentence,
        outputs=output_entities,
        title="Entity Linking with impresso-project/nel-hipe-multilingual",
        description="Link entities using the `impresso-project/nel-hipe-multilingual` model under the hood!  "
        "We recommend using shorter texts (ie sentences, not full paragraphs). <br>"
        "The sentences in the following format: <br>"
        "<it><< We are going to `[START]` Paris `[END]` >></it> <br>"
        "This format ensures that the model knows which entities to disambiguate, more exactly the "
        "entity should be surrounded by `[START]` and `[END]`. <br> <br>"
        "<b>Warning<b>: Only one entity per sentence is supported at the moment!",
        examples=[
            [
                "Des chercheurs de l' [START] Université de Cambridge [END] ont développé une nouvelle technique de calcul quantique qui promet d'augmenter exponentiellement les vitesses de calcul."
            ],
            [
                "Le rapport complet sur ces découvertes a été publié dans la prestigieuse revue 'Nature Physics'. ([START] Reuters [END])"
            ],
            [
                "In the [START] year 1789 [END], the Estates-General was convened in France."
            ],
            [
                "The event was held at the [START] Palace of Versailles [END], a symbol of French monarchy."
            ],
            [
                "At Versailles, [START] Antoinette, the Queen of France [END], was involved in discussions."
            ],
            [
                "[START] Maximilien Robespierre [END], a leading member of the National Assembly, also participated."
            ],
            [
                "[START] Jean-Jacques Rousseau, the famous philosopher [END], was a significant figure in the debate."
            ],
            [
                "Another important participant was [START] Charles de Talleyrand, the Bishop of Autun [END]."
            ],
            [
                "Meanwhile, across the Atlantic, [START] George Washington, the first President of the United States [END], was shaping policies."
            ],
            [
                "[START] Thomas Jefferson, the nation's Secretary of State [END], played a key role in drafting policies for the new American government."
            ],
        ],
    )

    interface.launch()


if __name__ == "__main__":
    nel_app_interface()