vickeee465
added ner
7a079bf
raw
history blame
1.45 kB
import gradio as gr
import os
import torch
import numpy as np
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer
from huggingface_hub import HfApi
languages = [
"English", "Hungarian", "Multilingual"
]
def download_models(models=["en_core_web_lg", "xx_ent_wiki_sm", "hu_core_news_lg"]):
for model in models:
if model.startswith("hu"):
huspacy.download(model)
spacy.cli.download(model_name)
def build_spacy_path(language: str):
language = language.lower()
if language == "english":
return "en_core_web_lg"
if language == "hungarian":
return "hu_core_news_lg"
else:
return "xx_ent_wiki_sm"
def named_entity_recognition(text, language):
model_id = build_spacy_path(language)
pipeline = spacy.load(model_id)
doc = pipeline(text)
entities = [(ent.text, ent.label_) for ent in doc.ents]
model_id_hf = f"huspacy/{model_id}" if model_id.startswith("hu") else f"spacy/{model_id}"
output_info = f'<p style="text-align: center; display: block">Prediction was made using the <a href="https://huggingface.co/{model_id_hf}">{model_id_hf}</a> model.</p>'
return entities, output_info
demo = gr.Interface(
fn=named_entity_recognition,
inputs=[gr.Textbox(lines=6, label="Input"),
gr.Dropdown(languages, label="Language")],
outputs=[gr.HighlightedText(label='Output'), gr.Markdown()])