Spaces:

poltextlab
/

babel_machine

Running

vickeee465

added ner

7a079bf over 1 year ago

1.45 kB

	import gradio as gr

	import os
	import torch
	import numpy as np
	from transformers import AutoModelForSequenceClassification
	from transformers import AutoTokenizer
	from huggingface_hub import HfApi

	languages = [
	"English", "Hungarian", "Multilingual"
	]

	def download_models(models=["en_core_web_lg", "xx_ent_wiki_sm", "hu_core_news_lg"]):
	for model in models:
	if model.startswith("hu"):
	huspacy.download(model)
	spacy.cli.download(model_name)

	def build_spacy_path(language: str):
	language = language.lower()
	if language == "english":
	return "en_core_web_lg"
	if language == "hungarian":
	return "hu_core_news_lg"
	else:
	return "xx_ent_wiki_sm"

	def named_entity_recognition(text, language):
	model_id = build_spacy_path(language)
	pipeline = spacy.load(model_id)
	doc = pipeline(text)
	entities = [(ent.text, ent.label_) for ent in doc.ents]
	model_id_hf = f"huspacy/{model_id}" if model_id.startswith("hu") else f"spacy/{model_id}"
	output_info = f'<p style="text-align: center; display: block">Prediction was made using the <a href="https://huggingface.co/{model_id_hf}">{model_id_hf}</a> model.</p>'
	return entities, output_info

	demo = gr.Interface(
	fn=named_entity_recognition,
	inputs=[gr.Textbox(lines=6, label="Input"),
	gr.Dropdown(languages, label="Language")],
	outputs=[gr.HighlightedText(label='Output'), gr.Markdown()])