Spaces:

impresso-project
/

multilingual-named-entity-recognition

Running

App Files Files Community

emanuelaboros commited on Oct 16, 2024

Commit

5436b2b

1 Parent(s): b5d1f19

lets highlight some entiteis

Browse files

Files changed (1) hide show

app.py +35 -24

app.py CHANGED Viewed

@@ -7,34 +7,40 @@ MODEL_NAME = "impresso-project/ner-stacked-bert-multilingual"
 # Load the tokenizer and model using the pipeline
 ner_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-ner_pipeline = pipeline("generic-ner", model=MODEL_NAME,
-                        tokenizer=ner_tokenizer,
-                        trust_remote_code=True,
-                        device='cpu')
-# Helper function to print entities nicely
-def print_nicely(entities):
-    entity_details = []
-    for entity in entities:
-        entity_info = f"Entity: {entity['entity']} | Confidence: {entity['score']:.2f}% | Text: {entity['word'].strip()} | Start: {entity['start']} | End: {entity['end']}"
-        entity_details.append(entity_info)
-    return "\n".join(entity_details)
 # Function to process the sentence and extract entities
 def extract_entities(sentence):
     results = ner_pipeline(sentence)
-    entity_results = []
-    # Extract and format the entities
-    for key in results.keys():
-        entity_results.append(print_nicely(results[key]))
-    return "\n".join(entity_results)
 # Create Gradio interface
 def ner_app_interface():
-    input_sentence = gr.Textbox(lines=5, label="Input Sentence", placeholder="Enter a sentence for NER...")
-    output_entities = gr.Textbox(label="Extracted Entities")
     # Interface definition
     interface = gr.Interface(
@@ -42,12 +48,17 @@ def ner_app_interface():
         inputs=input_sentence,
         outputs=output_entities,
         title="Named Entity Recognition",
-        description="Enter a sentence to extract named entities using the NER model from the Impresso project."
     )
     interface.launch()
 # Run the app
 if __name__ == "__main__":
     ner_app_interface()

 # Load the tokenizer and model using the pipeline
 ner_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+ner_pipeline = pipeline(
+    "generic-ner",
+    model=MODEL_NAME,
+    tokenizer=ner_tokenizer,
+    trust_remote_code=True,
+    device="cpu",
+)
 # Function to process the sentence and extract entities
 def extract_entities(sentence):
     results = ner_pipeline(sentence)
+    entities_with_confidences = []
+    # Extract and format the entities for highlighting
+    for entity in results:
+        entities_with_confidences.append(
+            (
+                entity["word"],
+                entity["start"],
+                entity["end"],
+                f"{entity['entity']} ({entity['score']:.2f}%)",
+            )
+        )
+    return {"text": sentence, "entities": entities_with_confidences}
 # Create Gradio interface
 def ner_app_interface():
+    input_sentence = gr.Textbox(
+        lines=5, label="Input Sentence", placeholder="Enter a sentence for NER..."
+    )
+    output_entities = gr.HighlightedText(label="Extracted Entities")
     # Interface definition
     interface = gr.Interface(
         inputs=input_sentence,
         outputs=output_entities,
         title="Named Entity Recognition",
+        description="Enter a sentence to extract named entities using the NER model from the Impresso project.",
+        examples=[
+            [
+                "In the year 1789, King Louis XVI, ruler of France, convened the Estates-General at the Palace of Versailles."
+            ]
+        ],
     )
     interface.launch()
 # Run the app
 if __name__ == "__main__":
     ner_app_interface()