Spaces:
Running
Running
File size: 1,987 Bytes
7a079bf e61173e af36483 7a079bf fa1f65b f9f8557 7a079bf 7ce342c 9cfccba 7a079bf dfd4863 7a079bf 12eab55 aa9d9e1 a39a7f9 12eab55 7a079bf fa1f65b 12eab55 7a079bf f7e1e22 7a079bf f7e1e22 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import gradio as gr
import os
import torch
import numpy as np
import spacy
import huspacy
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer
from huggingface_hub import HfApi
from spacy.glossary import GLOSSARY as NER_DICT
languages = [
"English", "Hungarian", "Multilingual"
]
def download_models(models=["en_core_web_lg", "xx_ent_wiki_sm", "hu_core_news_lg"]):
for model in models:
if model.startswith("hu"):
huspacy.download()
else:
spacy.cli.download(model)
def build_spacy_path(language: str):
language = language.lower()
if language == "english":
return "en_core_web_lg"
if language == "hungarian":
return "hu_core_news_lg"
else:
return "xx_ent_wiki_sm"
def named_entity_recognition(text, language):
model_id = build_spacy_path(language)
pipeline = huspacy.load() if model_id.startswith("hu") else spacy.load(model_id)
doc = pipeline(text)
entities = [{"entity":ent.label_, "start":ent.start_char, "end":ent.end_char} for ent in doc.ents]
labels_used = [ent.label_ for ent in doc.ents]
legend = '<p style="text-align: left; display: block">Legend:</p><ul style="text-align: left; display: block">'+"".join([f"<li> <b>{label}</b> = <i>{NER_DICT[label]}</i> </li>" for label in set(labels_used)])+"</ul>"
output = {"text":text, "entities":entities}
model_id_hf = f"huspacy/{model_id}" if model_id.startswith("hu") else f"spacy/{model_id}"
output_info = legend + f'<p style="text-align: center; display: block">Prediction was made using the <a href="https://huggingface.co/{model_id_hf}">{model_id_hf}</a> model.</p> <ul>'
return output, output_info
demo = gr.Interface(
title="NER Babel Demo",
fn=named_entity_recognition,
inputs=[gr.Textbox(lines=6, label="Input"),
gr.Dropdown(languages, label="Language")],
outputs=[gr.HighlightedText(label='Output'), gr.Markdown()])
|