Spaces:

TypicaAI
/

HealthcareNER-Fr

Running

App Files Files Community

hassoudi commited on Dec 31, 2024

Commit

a4f3cd1

verified ·

1 Parent(s): b9cfeca

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -45

app.py CHANGED Viewed

@@ -1,33 +1,28 @@
 import gradio as gr
 from huggingface_hub import login
-from transformers import AutoModelForTokenClassification, AutoTokenizer
 import os
-import torch
-# Initialize global model and tokenizer
-model = None
-tokenizer = None
-def load_healthcare_ner():
-    """Load the Healthcare NER model and tokenizer."""
-    global model, tokenizer
-    if model is None or tokenizer is None:
         login(token=os.environ["HF_TOKEN"])
-        model = AutoModelForTokenClassification.from_pretrained(
-            "TypicaAI/HealthcareNER-Fr",
-            use_auth_token=os.environ["HF_TOKEN"]
         )
-        tokenizer = AutoTokenizer.from_pretrained("TypicaAI/HealthcareNER-Fr")
-    return model, tokenizer
 def process_text(text):
     """Process input text and return highlighted entities."""
-    model, tokenizer = load_healthcare_ner()
-    inputs = tokenizer(text, return_tensors="pt", truncation=True)
-    outputs = model(**inputs)
-    # Decode entities from outputs
-    entities = extract_entities(outputs, tokenizer, text)
     # Highlight entities in the text
     html_output = highlight_entities(text, entities)
@@ -37,35 +32,14 @@ def process_text(text):
     return html_output
-def extract_entities(outputs, tokenizer, text):
-    """Extract entities from model outputs."""
-    tokens = tokenizer.tokenize(text)
-    predictions = torch.argmax(outputs.logits, dim=2).squeeze().tolist()
-    entities = []
-    current_entity = None
-    for token, prediction in zip(tokens, predictions):
-        label = model.config.id2label[prediction]
-        if label.startswith("B-"):
-            if current_entity:
-                entities.append(current_entity)
-            current_entity = {"entity": label[2:], "text": token, "start": len(text)}
-        elif label.startswith("I-") and current_entity:
-            current_entity["text"] += f" {token}"
-        elif current_entity:
-            entities.append(current_entity)
-            current_entity = None
-    if current_entity:
-        entities.append(current_entity)
-    return entities
 def highlight_entities(text, entities):
     """Highlight identified entities in the input text."""
     highlighted_text = text
     for entity in entities:
         highlighted_text = highlighted_text.replace(
-            entity["text"],
-            f'<mark style="background-color: yellow;">{entity["text"]}</mark>'
         )
     return f"<p>{highlighted_text}</p>"
@@ -122,4 +96,3 @@ with gr.Blocks() as marketing_elements:
 # Launch the Gradio demo
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
 from huggingface_hub import login
+from transformers import pipeline
 import os
+# Initialize global pipeline
+ner_pipeline = None
+def load_healthcare_ner_pipeline():
+    """Load the Hugging Face pipeline for Healthcare NER."""
+    global ner_pipeline
+    if ner_pipeline is None:
         login(token=os.environ["HF_TOKEN"])
+        ner_pipeline = pipeline(
+            "token-classification",
+            model="TypicaAI/HealthcareNER-Fr",
+            use_auth_token=os.environ["HF_TOKEN"],
+            aggregation_strategy="simple"  # Groups B- and I- tokens into entities
         )
+    return ner_pipeline
 def process_text(text):
     """Process input text and return highlighted entities."""
+    pipeline = load_healthcare_ner_pipeline()
+    entities = pipeline(text)
     # Highlight entities in the text
     html_output = highlight_entities(text, entities)
     return html_output
 def highlight_entities(text, entities):
     """Highlight identified entities in the input text."""
     highlighted_text = text
     for entity in entities:
+        entity_text = entity["word"]
         highlighted_text = highlighted_text.replace(
+            entity_text,
+            f'<mark style="background-color: yellow;">{entity_text}</mark>'
         )
     return f"<p>{highlighted_text}</p>"
 # Launch the Gradio demo
 if __name__ == "__main__":
     demo.launch()