Spaces:

TypicaAI
/

HealthcareNER-Fr

Running

File size: 3,223 Bytes

a350303
f33c0ca
a4f3cd1
f33c0ca
b9cfeca
a4f3cd1
 
a350303
a4f3cd1
 
 
 
e8c9626
a4f3cd1
 
 
e8c9626
a4f3cd1
b9cfeca
a4f3cd1
f33c0ca
 
b9cfeca
a4f3cd1
 
b9cfeca
 
f33c0ca
b9cfeca
 
f33c0ca
b9cfeca
f33c0ca
a350303
b9cfeca
 
 
 
a4f3cd1
b9cfeca
a4f3cd1
 
b9cfeca
 
 
 
 
 
 
 
9d5574c
 
 
f33c0ca
 
 
9d5574c
f33c0ca
 
9d5574c
f33c0ca
9d5574c
f33c0ca
 
 
9d5574c
f33c0ca
9d5574c
f33c0ca
 
 
 
 
 
b9cfeca
f33c0ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b9cfeca

import gradio as gr
from huggingface_hub import login
from transformers import pipeline
import os

# Initialize global pipeline
ner_pipeline = None

def load_healthcare_ner_pipeline():
    """Load the Hugging Face pipeline for Healthcare NER."""
    global ner_pipeline
    if ner_pipeline is None:
        login(token=os.environ["HFTOKEN"])
        ner_pipeline = pipeline(
            "token-classification",
            model="TypicaAI/HealthcareNER-Fr",
            use_auth_token=os.environ["HFTOKEN"],
            aggregation_strategy="simple"  # Groups B- and I- tokens into entities
        )
    return ner_pipeline

def process_text(text):
    """Process input text and return highlighted entities."""
    pipeline = load_healthcare_ner_pipeline()
    entities = pipeline(text)
    
    # Highlight entities in the text
    html_output = highlight_entities(text, entities)
    
    # Log usage
    log_demo_usage(text, len(entities))
    
    return html_output

def highlight_entities(text, entities):
    """Highlight identified entities in the input text."""
    highlighted_text = text
    for entity in entities:
        entity_text = entity["word"]
        highlighted_text = highlighted_text.replace(
            entity_text,
            f'<mark style="background-color: yellow;">{entity_text}</mark>'
        )
    return f"<p>{highlighted_text}</p>"

def log_demo_usage(text, num_entities):
    """Log demo usage for analytics."""
    print(f"Processed text: {text[:50]}... | Entities found: {num_entities}")

# Define the Gradio interface
demo = gr.Interface(
    fn=process_text,
    inputs=gr.Textbox(
        label="Paste French medical text",
        placeholder="Le patient présente une hypertension artérielle...",
        lines=5
    ),
    outputs=gr.HTML(label="Identified Medical Entities"),
    title="French Healthcare NER Demo | As featured in 'NLP on OCI'",
    description="""
    🔬 Live demo of the French Healthcare NER model built in Chapter 5 of 'NLP on OCI'
    
    📚 Follow along with the book to build this exact model step-by-step
    🏥 Perfect for medical text analysis, clinical studies, and healthcare compliance
    ⚡ Powered by Oracle Cloud Infrastructure
    
    By [Hicham Assoudi] - Oracle Consultant & AI Researcher
    """,
    examples=[
        ["Le patient souffre d'hypertension et diabète de type 2. Traitement: Metformine 500mg."],
        ["Antécédents: infarctus du myocarde en 2019. Allergie à la pénicilline."]
    ]
)

# Add marketing elements
with gr.Blocks() as marketing_elements:
    gr.Markdown("""
    ### 📖 Get the Complete Guide
    
    Learn how to build and deploy this exact model in 'NLP on OCI'
    - ✓ Step-by-step implementation
    - ✓ Performance optimization
    - ✓ Enterprise deployment patterns
    - ✓ Complete source code
    
    [Get the Book](your-book-link) | Use code `NERSPACE` for 15% off
    """)
    
    with gr.Row():
        email_input = gr.Textbox(
            label="Get the French Healthcare NER Dataset",
            placeholder="Enter your business email"
        )
        submit_btn = gr.Button("Access Dataset")

# Launch the Gradio demo
if __name__ == "__main__":
    demo.launch()