Spaces:

TypicaAI
/

HealthcareNER-Fr

Running

App Files Files Community

HealthcareNER-Fr / app.py

hassoudi

Update app.py

b9cfeca verified 3 months ago

raw

history blame

4.27 kB

	import gradio as gr
	from huggingface_hub import login
	from transformers import AutoModelForTokenClassification, AutoTokenizer
	import os
	import torch

	# Initialize global model and tokenizer
	model = None
	tokenizer = None

	def load_healthcare_ner():
	"""Load the Healthcare NER model and tokenizer."""
	global model, tokenizer
	if model is None or tokenizer is None:
	login(token=os.environ["HF_TOKEN"])
	model = AutoModelForTokenClassification.from_pretrained(
	"TypicaAI/HealthcareNER-Fr",
	use_auth_token=os.environ["HF_TOKEN"]
	)
	tokenizer = AutoTokenizer.from_pretrained("TypicaAI/HealthcareNER-Fr")
	return model, tokenizer

	def process_text(text):
	"""Process input text and return highlighted entities."""
	model, tokenizer = load_healthcare_ner()
	inputs = tokenizer(text, return_tensors="pt", truncation=True)
	outputs = model(**inputs)

	# Decode entities from outputs
	entities = extract_entities(outputs, tokenizer, text)

	# Highlight entities in the text
	html_output = highlight_entities(text, entities)

	# Log usage
	log_demo_usage(text, len(entities))

	return html_output

	def extract_entities(outputs, tokenizer, text):
	"""Extract entities from model outputs."""
	tokens = tokenizer.tokenize(text)
	predictions = torch.argmax(outputs.logits, dim=2).squeeze().tolist()

	entities = []
	current_entity = None
	for token, prediction in zip(tokens, predictions):
	label = model.config.id2label[prediction]
	if label.startswith("B-"):
	if current_entity:
	entities.append(current_entity)
	current_entity = {"entity": label[2:], "text": token, "start": len(text)}
	elif label.startswith("I-") and current_entity:
	current_entity["text"] += f" {token}"
	elif current_entity:
	entities.append(current_entity)
	current_entity = None
	if current_entity:
	entities.append(current_entity)
	return entities

	def highlight_entities(text, entities):
	"""Highlight identified entities in the input text."""
	highlighted_text = text
	for entity in entities:
	highlighted_text = highlighted_text.replace(
	entity["text"],
	f'<mark style="background-color: yellow;">{entity["text"]}</mark>'
	)
	return f"<p>{highlighted_text}</p>"

	def log_demo_usage(text, num_entities):
	"""Log demo usage for analytics."""
	print(f"Processed text: {text[:50]}... \| Entities found: {num_entities}")

	# Define the Gradio interface
	demo = gr.Interface(
	fn=process_text,
	inputs=gr.Textbox(
	label="Paste French medical text",
	placeholder="Le patient présente une hypertension artérielle...",
	lines=5
	),
	outputs=gr.HTML(label="Identified Medical Entities"),
	title="French Healthcare NER Demo \| As featured in 'NLP on OCI'",
	description="""
	🔬 Live demo of the French Healthcare NER model built in Chapter 5 of 'NLP on OCI'

	📚 Follow along with the book to build this exact model step-by-step
	🏥 Perfect for medical text analysis, clinical studies, and healthcare compliance
	⚡ Powered by Oracle Cloud Infrastructure

	By [Hicham Assoudi] - Oracle Consultant & AI Researcher
	""",
	examples=[
	["Le patient souffre d'hypertension et diabète de type 2. Traitement: Metformine 500mg."],
	["Antécédents: infarctus du myocarde en 2019. Allergie à la pénicilline."]
	]
	)

	# Add marketing elements
	with gr.Blocks() as marketing_elements:
	gr.Markdown("""
	### 📖 Get the Complete Guide

	Learn how to build and deploy this exact model in 'NLP on OCI'
	- ✓ Step-by-step implementation
	- ✓ Performance optimization
	- ✓ Enterprise deployment patterns
	- ✓ Complete source code

	[Get the Book](your-book-link) \| Use code `NERSPACE` for 15% off
	""")

	with gr.Row():
	email_input = gr.Textbox(
	label="Get the French Healthcare NER Dataset",
	placeholder="Enter your business email"
	)
	submit_btn = gr.Button("Access Dataset")

	# Launch the Gradio demo
	if __name__ == "__main__":
	demo.launch()