import gradio as gr
from huggingface_hub import login
from transformers import AutoModelForTokenClassification, AutoTokenizer
import os
import torch

# Initialize global model and tokenizer
model = None
tokenizer = None

def load_healthcare_ner():
    """Load the Healthcare NER model and tokenizer."""
    global model, tokenizer
    if model is None or tokenizer is None:
        login(token=os.environ["HF_TOKEN"])
        model = AutoModelForTokenClassification.from_pretrained(
            "TypicaAI/HealthcareNER-Fr",
            use_auth_token=os.environ["HF_TOKEN"]
        )
        tokenizer = AutoTokenizer.from_pretrained("TypicaAI/HealthcareNER-Fr")
    return model, tokenizer

def process_text(text):
    """Process input text and return highlighted entities."""
    model, tokenizer = load_healthcare_ner()
    inputs = tokenizer(text, return_tensors="pt", truncation=True)
    outputs = model(**inputs)

    # Decode entities from outputs
    entities = extract_entities(outputs, tokenizer, text)
    
    # Highlight entities in the text
    html_output = highlight_entities(text, entities)
    
    # Log usage
    log_demo_usage(text, len(entities))
    
    return html_output

def extract_entities(outputs, tokenizer, text):
    """Extract entities from model outputs."""
    tokens = tokenizer.tokenize(text)
    predictions = torch.argmax(outputs.logits, dim=2).squeeze().tolist()

    entities = []
    current_entity = None
    for token, prediction in zip(tokens, predictions):
        label = model.config.id2label[prediction]
        if label.startswith("B-"):
            if current_entity:
                entities.append(current_entity)
            current_entity = {"entity": label[2:], "text": token, "start": len(text)}
        elif label.startswith("I-") and current_entity:
            current_entity["text"] += f" {token}"
        elif current_entity:
            entities.append(current_entity)
            current_entity = None
    if current_entity:
        entities.append(current_entity)
    return entities

def highlight_entities(text, entities):
    """Highlight identified entities in the input text."""
    highlighted_text = text
    for entity in entities:
        highlighted_text = highlighted_text.replace(
            entity["text"],
            f'<mark style="background-color: yellow;">{entity["text"]}</mark>'
        )
    return f"<p>{highlighted_text}</p>"

def log_demo_usage(text, num_entities):
    """Log demo usage for analytics."""
    print(f"Processed text: {text[:50]}... | Entities found: {num_entities}")

# Define the Gradio interface
demo = gr.Interface(
    fn=process_text,
    inputs=gr.Textbox(
        label="Paste French medical text",
        placeholder="Le patient présente une hypertension artérielle...",
        lines=5
    ),
    outputs=gr.HTML(label="Identified Medical Entities"),
    title="French Healthcare NER Demo | As featured in 'NLP on OCI'",
    description="""
    🔬 Live demo of the French Healthcare NER model built in Chapter 5 of 'NLP on OCI'
    
    📚 Follow along with the book to build this exact model step-by-step
    🏥 Perfect for medical text analysis, clinical studies, and healthcare compliance
    ⚡ Powered by Oracle Cloud Infrastructure
    
    By [Hicham Assoudi] - Oracle Consultant & AI Researcher
    """,
    examples=[
        ["Le patient souffre d'hypertension et diabète de type 2. Traitement: Metformine 500mg."],
        ["Antécédents: infarctus du myocarde en 2019. Allergie à la pénicilline."]
    ]
)

# Add marketing elements
with gr.Blocks() as marketing_elements:
    gr.Markdown("""
    ### 📖 Get the Complete Guide
    
    Learn how to build and deploy this exact model in 'NLP on OCI'
    - ✓ Step-by-step implementation
    - ✓ Performance optimization
    - ✓ Enterprise deployment patterns
    - ✓ Complete source code
    
    [Get the Book](your-book-link) | Use code `NERSPACE` for 15% off
    """)
    
    with gr.Row():
        email_input = gr.Textbox(
            label="Get the French Healthcare NER Dataset",
            placeholder="Enter your business email"
        )
        submit_btn = gr.Button("Access Dataset")

# Launch the Gradio demo
if __name__ == "__main__":
    demo.launch()