Spaces:

TypicaAI
/

HealthcareNER-Fr

Sleeping

App Files Files Community

hassoudi commited on Dec 31, 2024

Commit

b9cfeca

verified ·

1 Parent(s): f33c0ca

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -11

app.py CHANGED Viewed

@@ -1,23 +1,79 @@
 import gradio as gr
 from huggingface_hub import login
 import os
 def load_healthcare_ner():
-    login(token=os.environ["HF_TOKEN"])
-    model = AutoModelForTokenClassification.from_pretrained(
-        "TypicaAI/HealthcareNER-Fr",
-        token=os.environ["HF_TOKEN"]
-    )
-    return model
 def process_text(text):
-    entities = model(text)
-    # Format results with highlighting
     html_output = highlight_entities(text, entities)
-    # Track usage for marketing insights
     log_demo_usage(text, len(entities))
     return html_output
 demo = gr.Interface(
     fn=process_text,
     inputs=gr.Textbox(
@@ -42,7 +98,7 @@ demo = gr.Interface(
     ]
 )
-# Add conversion elements
 with gr.Blocks() as marketing_elements:
     gr.Markdown("""
     ### 📖 Get the Complete Guide
@@ -61,4 +117,9 @@ with gr.Blocks() as marketing_elements:
             label="Get the French Healthcare NER Dataset",
             placeholder="Enter your business email"
         )
-        submit_btn = gr.Button("Access Dataset")

 import gradio as gr
 from huggingface_hub import login
+from transformers import AutoModelForTokenClassification, AutoTokenizer
 import os
+import torch
+# Initialize global model and tokenizer
+model = None
+tokenizer = None
 def load_healthcare_ner():
+    """Load the Healthcare NER model and tokenizer."""
+    global model, tokenizer
+    if model is None or tokenizer is None:
+        login(token=os.environ["HF_TOKEN"])
+        model = AutoModelForTokenClassification.from_pretrained(
+            "TypicaAI/HealthcareNER-Fr",
+            use_auth_token=os.environ["HF_TOKEN"]
+        )
+        tokenizer = AutoTokenizer.from_pretrained("TypicaAI/HealthcareNER-Fr")
+    return model, tokenizer
 def process_text(text):
+    """Process input text and return highlighted entities."""
+    model, tokenizer = load_healthcare_ner()
+    inputs = tokenizer(text, return_tensors="pt", truncation=True)
+    outputs = model(**inputs)
+    # Decode entities from outputs
+    entities = extract_entities(outputs, tokenizer, text)
+    # Highlight entities in the text
     html_output = highlight_entities(text, entities)
+    # Log usage
     log_demo_usage(text, len(entities))
     return html_output
+def extract_entities(outputs, tokenizer, text):
+    """Extract entities from model outputs."""
+    tokens = tokenizer.tokenize(text)
+    predictions = torch.argmax(outputs.logits, dim=2).squeeze().tolist()
+    entities = []
+    current_entity = None
+    for token, prediction in zip(tokens, predictions):
+        label = model.config.id2label[prediction]
+        if label.startswith("B-"):
+            if current_entity:
+                entities.append(current_entity)
+            current_entity = {"entity": label[2:], "text": token, "start": len(text)}
+        elif label.startswith("I-") and current_entity:
+            current_entity["text"] += f" {token}"
+        elif current_entity:
+            entities.append(current_entity)
+            current_entity = None
+    if current_entity:
+        entities.append(current_entity)
+    return entities
+def highlight_entities(text, entities):
+    """Highlight identified entities in the input text."""
+    highlighted_text = text
+    for entity in entities:
+        highlighted_text = highlighted_text.replace(
+            entity["text"],
+            f'<mark style="background-color: yellow;">{entity["text"]}</mark>'
+        )
+    return f"<p>{highlighted_text}</p>"
+def log_demo_usage(text, num_entities):
+    """Log demo usage for analytics."""
+    print(f"Processed text: {text[:50]}... | Entities found: {num_entities}")
+# Define the Gradio interface
 demo = gr.Interface(
     fn=process_text,
     inputs=gr.Textbox(
     ]
 )
+# Add marketing elements
 with gr.Blocks() as marketing_elements:
     gr.Markdown("""
     ### 📖 Get the Complete Guide
             label="Get the French Healthcare NER Dataset",
             placeholder="Enter your business email"
         )
+        submit_btn = gr.Button("Access Dataset")
+# Launch the Gradio demo
+if __name__ == "__main__":
+    demo.launch()