Spaces:
Sleeping
Sleeping
File size: 4,272 Bytes
a350303 f33c0ca b9cfeca f33c0ca b9cfeca a350303 f33c0ca b9cfeca f33c0ca b9cfeca f33c0ca b9cfeca f33c0ca b9cfeca f33c0ca a350303 b9cfeca 9d5574c f33c0ca 9d5574c f33c0ca 9d5574c f33c0ca 9d5574c f33c0ca 9d5574c f33c0ca 9d5574c f33c0ca b9cfeca f33c0ca b9cfeca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
import gradio as gr
from huggingface_hub import login
from transformers import AutoModelForTokenClassification, AutoTokenizer
import os
import torch
# Initialize global model and tokenizer
model = None
tokenizer = None
def load_healthcare_ner():
"""Load the Healthcare NER model and tokenizer."""
global model, tokenizer
if model is None or tokenizer is None:
login(token=os.environ["HF_TOKEN"])
model = AutoModelForTokenClassification.from_pretrained(
"TypicaAI/HealthcareNER-Fr",
use_auth_token=os.environ["HF_TOKEN"]
)
tokenizer = AutoTokenizer.from_pretrained("TypicaAI/HealthcareNER-Fr")
return model, tokenizer
def process_text(text):
"""Process input text and return highlighted entities."""
model, tokenizer = load_healthcare_ner()
inputs = tokenizer(text, return_tensors="pt", truncation=True)
outputs = model(**inputs)
# Decode entities from outputs
entities = extract_entities(outputs, tokenizer, text)
# Highlight entities in the text
html_output = highlight_entities(text, entities)
# Log usage
log_demo_usage(text, len(entities))
return html_output
def extract_entities(outputs, tokenizer, text):
"""Extract entities from model outputs."""
tokens = tokenizer.tokenize(text)
predictions = torch.argmax(outputs.logits, dim=2).squeeze().tolist()
entities = []
current_entity = None
for token, prediction in zip(tokens, predictions):
label = model.config.id2label[prediction]
if label.startswith("B-"):
if current_entity:
entities.append(current_entity)
current_entity = {"entity": label[2:], "text": token, "start": len(text)}
elif label.startswith("I-") and current_entity:
current_entity["text"] += f" {token}"
elif current_entity:
entities.append(current_entity)
current_entity = None
if current_entity:
entities.append(current_entity)
return entities
def highlight_entities(text, entities):
"""Highlight identified entities in the input text."""
highlighted_text = text
for entity in entities:
highlighted_text = highlighted_text.replace(
entity["text"],
f'<mark style="background-color: yellow;">{entity["text"]}</mark>'
)
return f"<p>{highlighted_text}</p>"
def log_demo_usage(text, num_entities):
"""Log demo usage for analytics."""
print(f"Processed text: {text[:50]}... | Entities found: {num_entities}")
# Define the Gradio interface
demo = gr.Interface(
fn=process_text,
inputs=gr.Textbox(
label="Paste French medical text",
placeholder="Le patient présente une hypertension artérielle...",
lines=5
),
outputs=gr.HTML(label="Identified Medical Entities"),
title="French Healthcare NER Demo | As featured in 'NLP on OCI'",
description="""
🔬 Live demo of the French Healthcare NER model built in Chapter 5 of 'NLP on OCI'
📚 Follow along with the book to build this exact model step-by-step
🏥 Perfect for medical text analysis, clinical studies, and healthcare compliance
⚡ Powered by Oracle Cloud Infrastructure
By [Hicham Assoudi] - Oracle Consultant & AI Researcher
""",
examples=[
["Le patient souffre d'hypertension et diabète de type 2. Traitement: Metformine 500mg."],
["Antécédents: infarctus du myocarde en 2019. Allergie à la pénicilline."]
]
)
# Add marketing elements
with gr.Blocks() as marketing_elements:
gr.Markdown("""
### 📖 Get the Complete Guide
Learn how to build and deploy this exact model in 'NLP on OCI'
- ✓ Step-by-step implementation
- ✓ Performance optimization
- ✓ Enterprise deployment patterns
- ✓ Complete source code
[Get the Book](your-book-link) | Use code `NERSPACE` for 15% off
""")
with gr.Row():
email_input = gr.Textbox(
label="Get the French Healthcare NER Dataset",
placeholder="Enter your business email"
)
submit_btn = gr.Button("Access Dataset")
# Launch the Gradio demo
if __name__ == "__main__":
demo.launch()
|