hassoudi's picture
Update app.py
b9cfeca verified
raw
history blame
4.27 kB
import gradio as gr
from huggingface_hub import login
from transformers import AutoModelForTokenClassification, AutoTokenizer
import os
import torch
# Initialize global model and tokenizer
model = None
tokenizer = None
def load_healthcare_ner():
"""Load the Healthcare NER model and tokenizer."""
global model, tokenizer
if model is None or tokenizer is None:
login(token=os.environ["HF_TOKEN"])
model = AutoModelForTokenClassification.from_pretrained(
"TypicaAI/HealthcareNER-Fr",
use_auth_token=os.environ["HF_TOKEN"]
)
tokenizer = AutoTokenizer.from_pretrained("TypicaAI/HealthcareNER-Fr")
return model, tokenizer
def process_text(text):
"""Process input text and return highlighted entities."""
model, tokenizer = load_healthcare_ner()
inputs = tokenizer(text, return_tensors="pt", truncation=True)
outputs = model(**inputs)
# Decode entities from outputs
entities = extract_entities(outputs, tokenizer, text)
# Highlight entities in the text
html_output = highlight_entities(text, entities)
# Log usage
log_demo_usage(text, len(entities))
return html_output
def extract_entities(outputs, tokenizer, text):
"""Extract entities from model outputs."""
tokens = tokenizer.tokenize(text)
predictions = torch.argmax(outputs.logits, dim=2).squeeze().tolist()
entities = []
current_entity = None
for token, prediction in zip(tokens, predictions):
label = model.config.id2label[prediction]
if label.startswith("B-"):
if current_entity:
entities.append(current_entity)
current_entity = {"entity": label[2:], "text": token, "start": len(text)}
elif label.startswith("I-") and current_entity:
current_entity["text"] += f" {token}"
elif current_entity:
entities.append(current_entity)
current_entity = None
if current_entity:
entities.append(current_entity)
return entities
def highlight_entities(text, entities):
"""Highlight identified entities in the input text."""
highlighted_text = text
for entity in entities:
highlighted_text = highlighted_text.replace(
entity["text"],
f'<mark style="background-color: yellow;">{entity["text"]}</mark>'
)
return f"<p>{highlighted_text}</p>"
def log_demo_usage(text, num_entities):
"""Log demo usage for analytics."""
print(f"Processed text: {text[:50]}... | Entities found: {num_entities}")
# Define the Gradio interface
demo = gr.Interface(
fn=process_text,
inputs=gr.Textbox(
label="Paste French medical text",
placeholder="Le patient présente une hypertension artérielle...",
lines=5
),
outputs=gr.HTML(label="Identified Medical Entities"),
title="French Healthcare NER Demo | As featured in 'NLP on OCI'",
description="""
🔬 Live demo of the French Healthcare NER model built in Chapter 5 of 'NLP on OCI'
📚 Follow along with the book to build this exact model step-by-step
🏥 Perfect for medical text analysis, clinical studies, and healthcare compliance
⚡ Powered by Oracle Cloud Infrastructure
By [Hicham Assoudi] - Oracle Consultant & AI Researcher
""",
examples=[
["Le patient souffre d'hypertension et diabète de type 2. Traitement: Metformine 500mg."],
["Antécédents: infarctus du myocarde en 2019. Allergie à la pénicilline."]
]
)
# Add marketing elements
with gr.Blocks() as marketing_elements:
gr.Markdown("""
### 📖 Get the Complete Guide
Learn how to build and deploy this exact model in 'NLP on OCI'
- ✓ Step-by-step implementation
- ✓ Performance optimization
- ✓ Enterprise deployment patterns
- ✓ Complete source code
[Get the Book](your-book-link) | Use code `NERSPACE` for 15% off
""")
with gr.Row():
email_input = gr.Textbox(
label="Get the French Healthcare NER Dataset",
placeholder="Enter your business email"
)
submit_btn = gr.Button("Access Dataset")
# Launch the Gradio demo
if __name__ == "__main__":
demo.launch()