import gradio as gr from huggingface_hub import login from transformers import AutoModelForTokenClassification, AutoTokenizer import os import torch # Initialize global model and tokenizer model = None tokenizer = None def load_healthcare_ner(): """Load the Healthcare NER model and tokenizer.""" global model, tokenizer if model is None or tokenizer is None: login(token=os.environ["HF_TOKEN"]) model = AutoModelForTokenClassification.from_pretrained( "TypicaAI/HealthcareNER-Fr", use_auth_token=os.environ["HF_TOKEN"] ) tokenizer = AutoTokenizer.from_pretrained("TypicaAI/HealthcareNER-Fr") return model, tokenizer def process_text(text): """Process input text and return highlighted entities.""" model, tokenizer = load_healthcare_ner() inputs = tokenizer(text, return_tensors="pt", truncation=True) outputs = model(**inputs) # Decode entities from outputs entities = extract_entities(outputs, tokenizer, text) # Highlight entities in the text html_output = highlight_entities(text, entities) # Log usage log_demo_usage(text, len(entities)) return html_output def extract_entities(outputs, tokenizer, text): """Extract entities from model outputs.""" tokens = tokenizer.tokenize(text) predictions = torch.argmax(outputs.logits, dim=2).squeeze().tolist() entities = [] current_entity = None for token, prediction in zip(tokens, predictions): label = model.config.id2label[prediction] if label.startswith("B-"): if current_entity: entities.append(current_entity) current_entity = {"entity": label[2:], "text": token, "start": len(text)} elif label.startswith("I-") and current_entity: current_entity["text"] += f" {token}" elif current_entity: entities.append(current_entity) current_entity = None if current_entity: entities.append(current_entity) return entities def highlight_entities(text, entities): """Highlight identified entities in the input text.""" highlighted_text = text for entity in entities: highlighted_text = highlighted_text.replace( entity["text"], f'{entity["text"]}' ) return f"
{highlighted_text}
" def log_demo_usage(text, num_entities): """Log demo usage for analytics.""" print(f"Processed text: {text[:50]}... | Entities found: {num_entities}") # Define the Gradio interface demo = gr.Interface( fn=process_text, inputs=gr.Textbox( label="Paste French medical text", placeholder="Le patient présente une hypertension artérielle...", lines=5 ), outputs=gr.HTML(label="Identified Medical Entities"), title="French Healthcare NER Demo | As featured in 'NLP on OCI'", description=""" 🔬 Live demo of the French Healthcare NER model built in Chapter 5 of 'NLP on OCI' 📚 Follow along with the book to build this exact model step-by-step 🏥 Perfect for medical text analysis, clinical studies, and healthcare compliance ⚡ Powered by Oracle Cloud Infrastructure By [Hicham Assoudi] - Oracle Consultant & AI Researcher """, examples=[ ["Le patient souffre d'hypertension et diabète de type 2. Traitement: Metformine 500mg."], ["Antécédents: infarctus du myocarde en 2019. Allergie à la pénicilline."] ] ) # Add marketing elements with gr.Blocks() as marketing_elements: gr.Markdown(""" ### 📖 Get the Complete Guide Learn how to build and deploy this exact model in 'NLP on OCI' - ✓ Step-by-step implementation - ✓ Performance optimization - ✓ Enterprise deployment patterns - ✓ Complete source code [Get the Book](your-book-link) | Use code `NERSPACE` for 15% off """) with gr.Row(): email_input = gr.Textbox( label="Get the French Healthcare NER Dataset", placeholder="Enter your business email" ) submit_btn = gr.Button("Access Dataset") # Launch the Gradio demo if __name__ == "__main__": demo.launch()