|
import gradio as gr |
|
from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer |
|
|
|
|
|
MODEL_NAME = "impresso-project/ner-stacked-bert-multilingual" |
|
|
|
|
|
ner_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
|
|
|
ner_pipeline = pipeline( |
|
"generic-ner", |
|
model=MODEL_NAME, |
|
tokenizer=ner_tokenizer, |
|
trust_remote_code=True, |
|
device="cpu", |
|
) |
|
|
|
|
|
|
|
def prepare_entities_for_highlight(text, results): |
|
entities = [] |
|
for category, entity_list in results.items(): |
|
for entity in entity_list: |
|
|
|
entities.append( |
|
{ |
|
"start": entity["start"], |
|
"end": entity["end"], |
|
"label": f"{entity['entity']} ({entity['score']:.2f}%)", |
|
} |
|
) |
|
|
|
return {"text": text, "entities": entities} |
|
|
|
|
|
|
|
def extract_entities(sentence): |
|
results = ner_pipeline(sentence) |
|
|
|
|
|
return prepare_entities_for_highlight(sentence, results) |
|
|
|
|
|
|
|
def ner_app_interface(): |
|
input_sentence = gr.Textbox( |
|
lines=5, label="Input Sentence", placeholder="Enter a sentence for NER..." |
|
) |
|
output_entities = gr.HighlightedText(label="Extracted Entities") |
|
|
|
|
|
interface = gr.Interface( |
|
fn=extract_entities, |
|
inputs=input_sentence, |
|
outputs=output_entities, |
|
title="Named Entity Recognition", |
|
description="Enter a sentence to extract named entities using the NER model from the Impresso project.", |
|
examples=[ |
|
[ |
|
"In the year 1789, King Louis XVI, ruler of France, convened the Estates-General at the Palace of Versailles." |
|
] |
|
], |
|
live=False, |
|
) |
|
|
|
interface.launch(share=True) |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
ner_app_interface() |
|
|