|
import gradio as gr |
|
from transformers import pipeline, AutoTokenizer |
|
|
|
|
|
MODEL_NAME = "impresso-project/ner-stacked-bert-multilingual" |
|
|
|
|
|
ner_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
|
|
|
ner_pipeline = pipeline( |
|
"generic-ner", |
|
model=MODEL_NAME, |
|
tokenizer=ner_tokenizer, |
|
trust_remote_code=True, |
|
device="cpu", |
|
) |
|
|
|
|
|
def format_entities_as_html(entities): |
|
excluded_keys = {"start", "end", "index"} |
|
html_output = "<div>" |
|
|
|
for entity in entities: |
|
html_output += ( |
|
"<div style='margin-bottom: 10px;'>" |
|
) |
|
|
|
|
|
for key, value in entity.items(): |
|
if key not in excluded_keys: |
|
if isinstance(value, float): |
|
html_output += ( |
|
f"<strong>{key.capitalize()}:</strong> {value:.2f}<br>" |
|
) |
|
else: |
|
html_output += f"<strong>{key.capitalize()}:</strong> {value}<br>" |
|
|
|
html_output += "</div>" |
|
|
|
html_output += "</div>" |
|
return html_output |
|
|
|
|
|
|
|
def extract_entities(sentence): |
|
results = ner_pipeline(sentence) |
|
|
|
|
|
print(f"NER results: {results}") |
|
|
|
entities = [] |
|
seen_spans = set() |
|
|
|
|
|
print(f"Original text: {sentence}") |
|
print("Results:", results) |
|
|
|
|
|
|
|
for entity in results: |
|
entity["start"] = entity["lOffset"] |
|
entity["end"] = entity["rOffset"] |
|
entity_span = (entity["start"], entity["end"]) |
|
|
|
|
|
if entity_span not in seen_spans: |
|
seen_spans.add(entity_span) |
|
entity_text = sentence[ |
|
entity["start"] : entity["end"] |
|
].strip() |
|
entity["surface"] = entity_text |
|
label = f"{entity['type']}" |
|
if "title" in entity: |
|
label += f" - Title: {entity['title']}" |
|
if "name" in entity: |
|
label += f" - Name: {entity['name']}" |
|
if "function" in entity: |
|
label += f" - Function: {entity['function']}" |
|
entity["entity"] = label |
|
|
|
|
|
entities.append(entity) |
|
print(f"Entities: {entities}") |
|
|
|
|
|
return {"text": sentence, "entities": entities} |
|
|
|
|
|
|
|
def ner_app_interface(): |
|
input_sentence = gr.Textbox( |
|
lines=5, label="Input Sentence", placeholder="Enter a sentence for NER:" |
|
) |
|
output_entities = gr.HTML(label="Extracted Entities") |
|
|
|
|
|
interface = gr.Interface( |
|
fn=extract_entities, |
|
inputs=input_sentence, |
|
outputs=[gr.HighlightedText(label="Text with mentions")], |
|
|
|
title="Named Entity Recognition", |
|
description="Enter a sentence to extract named entities using the NER model from the Impresso project.", |
|
examples=[ |
|
[ |
|
"Des chercheurs de l'Université de Cambridge ont développé une nouvelle technique de calcul quantique qui promet d'augmenter exponentiellement les vitesses de calcul." |
|
], |
|
[ |
|
"Le rapport complet sur ces découvertes a été publié dans la prestigieuse revue 'Nature Physics'. (Reuters)" |
|
], |
|
["In the year 1789, the Estates-General was convened in France."], |
|
[ |
|
"The event was held at the Palace of Versailles, a symbol of French monarchy." |
|
], |
|
[ |
|
"At Versailles, Marie Antoinette, the Queen of France, was involved in discussions." |
|
], |
|
[ |
|
"Maximilien Robespierre, a leading member of the National Assembly, also participated." |
|
], |
|
[ |
|
"Jean-Jacques Rousseau, the famous philosopher, was a significant figure in the debate." |
|
], |
|
[ |
|
"Another important participant was Charles de Talleyrand, the Bishop of Autun." |
|
], |
|
[ |
|
"Meanwhile, across the Atlantic, George Washington, the first President of the United States, was shaping policies." |
|
], |
|
[ |
|
"Thomas Jefferson, the nation's Secretary of State, played a key role in drafting policies for the new American government." |
|
], |
|
], |
|
live=False, |
|
) |
|
|
|
interface.launch(share=True) |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
ner_app_interface() |
|
|