File size: 4,272 Bytes
a350303
f33c0ca
b9cfeca
f33c0ca
b9cfeca
 
 
 
 
a350303
f33c0ca
b9cfeca
 
 
 
 
 
 
 
 
 
f33c0ca
 
b9cfeca
 
 
 
 
 
 
 
 
f33c0ca
b9cfeca
 
f33c0ca
b9cfeca
f33c0ca
a350303
b9cfeca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9d5574c
 
 
f33c0ca
 
 
9d5574c
f33c0ca
 
9d5574c
f33c0ca
9d5574c
f33c0ca
 
 
9d5574c
f33c0ca
9d5574c
f33c0ca
 
 
 
 
 
b9cfeca
f33c0ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b9cfeca
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import gradio as gr
from huggingface_hub import login
from transformers import AutoModelForTokenClassification, AutoTokenizer
import os
import torch

# Initialize global model and tokenizer
model = None
tokenizer = None

def load_healthcare_ner():
    """Load the Healthcare NER model and tokenizer."""
    global model, tokenizer
    if model is None or tokenizer is None:
        login(token=os.environ["HF_TOKEN"])
        model = AutoModelForTokenClassification.from_pretrained(
            "TypicaAI/HealthcareNER-Fr",
            use_auth_token=os.environ["HF_TOKEN"]
        )
        tokenizer = AutoTokenizer.from_pretrained("TypicaAI/HealthcareNER-Fr")
    return model, tokenizer

def process_text(text):
    """Process input text and return highlighted entities."""
    model, tokenizer = load_healthcare_ner()
    inputs = tokenizer(text, return_tensors="pt", truncation=True)
    outputs = model(**inputs)

    # Decode entities from outputs
    entities = extract_entities(outputs, tokenizer, text)
    
    # Highlight entities in the text
    html_output = highlight_entities(text, entities)
    
    # Log usage
    log_demo_usage(text, len(entities))
    
    return html_output

def extract_entities(outputs, tokenizer, text):
    """Extract entities from model outputs."""
    tokens = tokenizer.tokenize(text)
    predictions = torch.argmax(outputs.logits, dim=2).squeeze().tolist()

    entities = []
    current_entity = None
    for token, prediction in zip(tokens, predictions):
        label = model.config.id2label[prediction]
        if label.startswith("B-"):
            if current_entity:
                entities.append(current_entity)
            current_entity = {"entity": label[2:], "text": token, "start": len(text)}
        elif label.startswith("I-") and current_entity:
            current_entity["text"] += f" {token}"
        elif current_entity:
            entities.append(current_entity)
            current_entity = None
    if current_entity:
        entities.append(current_entity)
    return entities

def highlight_entities(text, entities):
    """Highlight identified entities in the input text."""
    highlighted_text = text
    for entity in entities:
        highlighted_text = highlighted_text.replace(
            entity["text"],
            f'<mark style="background-color: yellow;">{entity["text"]}</mark>'
        )
    return f"<p>{highlighted_text}</p>"

def log_demo_usage(text, num_entities):
    """Log demo usage for analytics."""
    print(f"Processed text: {text[:50]}... | Entities found: {num_entities}")

# Define the Gradio interface
demo = gr.Interface(
    fn=process_text,
    inputs=gr.Textbox(
        label="Paste French medical text",
        placeholder="Le patient présente une hypertension artérielle...",
        lines=5
    ),
    outputs=gr.HTML(label="Identified Medical Entities"),
    title="French Healthcare NER Demo | As featured in 'NLP on OCI'",
    description="""
    🔬 Live demo of the French Healthcare NER model built in Chapter 5 of 'NLP on OCI'
    
    📚 Follow along with the book to build this exact model step-by-step
    🏥 Perfect for medical text analysis, clinical studies, and healthcare compliance
    ⚡ Powered by Oracle Cloud Infrastructure
    
    By [Hicham Assoudi] - Oracle Consultant & AI Researcher
    """,
    examples=[
        ["Le patient souffre d'hypertension et diabète de type 2. Traitement: Metformine 500mg."],
        ["Antécédents: infarctus du myocarde en 2019. Allergie à la pénicilline."]
    ]
)

# Add marketing elements
with gr.Blocks() as marketing_elements:
    gr.Markdown("""
    ### 📖 Get the Complete Guide
    
    Learn how to build and deploy this exact model in 'NLP on OCI'
    - ✓ Step-by-step implementation
    - ✓ Performance optimization
    - ✓ Enterprise deployment patterns
    - ✓ Complete source code
    
    [Get the Book](your-book-link) | Use code `NERSPACE` for 15% off
    """)
    
    with gr.Row():
        email_input = gr.Textbox(
            label="Get the French Healthcare NER Dataset",
            placeholder="Enter your business email"
        )
        submit_btn = gr.Button("Access Dataset")

# Launch the Gradio demo
if __name__ == "__main__":
    demo.launch()