|
import gradio as gr |
|
from transformers import pipeline, AutoTokenizer |
|
|
|
|
|
MODEL_NAME = "impresso-project/ner-stacked-bert-multilingual" |
|
|
|
|
|
ner_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
|
|
|
ner_pipeline = pipeline( |
|
"generic-ner", |
|
model=MODEL_NAME, |
|
tokenizer=ner_tokenizer, |
|
trust_remote_code=True, |
|
device="cpu", |
|
) |
|
|
|
|
|
|
|
def prepare_entities_for_highlight(text, results): |
|
entities = [] |
|
seen_spans = set() |
|
|
|
|
|
print(f"Original text: {text}") |
|
|
|
|
|
|
|
for category, entity_list in results.items(): |
|
for entity in entity_list: |
|
entity_span = (entity["start"], entity["end"]) |
|
|
|
|
|
if entity_span not in seen_spans: |
|
seen_spans.add(entity_span) |
|
entity_text = text[ |
|
entity["start"] : entity["end"] |
|
].strip() |
|
print( |
|
f"Entity text: {entity_text}, Start: {entity['start']}, End: {entity['end']}, Type: {entity['entity']}" |
|
) |
|
entities.append( |
|
{ |
|
"text": entity_text, |
|
"score": entity["score"], |
|
"start": entity["start"], |
|
"end": entity["end"], |
|
"label": entity["entity"], |
|
} |
|
) |
|
|
|
|
|
entities = sorted(entities, key=lambda x: x["start"]) |
|
|
|
return {"text": text, "entities": entities} |
|
|
|
|
|
|
|
def extract_entities(sentence): |
|
results = ner_pipeline(sentence) |
|
|
|
|
|
print(f"NER results: {results}") |
|
|
|
|
|
return prepare_entities_for_highlight(sentence, results) |
|
|
|
|
|
|
|
def ner_app_interface(): |
|
input_sentence = gr.Textbox( |
|
lines=5, label="Input Sentence", placeholder="Enter a sentence for NER..." |
|
) |
|
output_entities = gr.HighlightedText(label="Extracted Entities") |
|
|
|
|
|
interface = gr.Interface( |
|
fn=extract_entities, |
|
inputs=input_sentence, |
|
outputs=output_entities, |
|
title="Named Entity Recognition", |
|
description="Enter a sentence to extract named entities using the NER model from the Impresso project.", |
|
examples=[ |
|
[ |
|
"In the year 1789, King Louis XVI, ruler of France, convened the Estates-General at the Palace of Versailles." |
|
] |
|
], |
|
live=False, |
|
) |
|
|
|
interface.launch(share=True) |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
ner_app_interface() |
|
|