Spaces:

Beehzod
/

named_entity_recognizer

Sleeping

App Files Files Community

Beehzod commited on Nov 13, 2024

Commit

bfa4c9d

verified ·

1 Parent(s): c0fa9a7

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -11

app.py CHANGED Viewed

@@ -1,24 +1,73 @@
-# import streamlit as st
 # from transformers import pipeline
-# pipe = pipeline("text-classification", model="Beehzod/smart-finetuned-ner")
 # text = st.text_area('enter text: ')
-# if text:
-#     out = pipe(text)
-#     st.json(out)
 import streamlit as st
 from transformers import pipeline
 # Load the model from the Hugging Face Hub
 ner_pipeline = pipeline("ner", model="Beehzod/smart-finetuned-ner")
-# Example predictions
-text = st.text_area('enter text: ')
-results = ner_pipeline(text)
-for entity in results:
-    print(f"Entity: {entity['word']}, Label: {entity['entity']}, Score: {entity['score']:.4f}")
-    st.json(entity)

+# import streamlit as st
 # from transformers import pipeline
+# # Load the model from the Hugging Face Hub
+# ner_pipeline = pipeline("ner", model="Beehzod/smart-finetuned-ner")
+# # Example predictions
 # text = st.text_area('enter text: ')
+# results = ner_pipeline(text)
+# for entity in results:
+#     print(f"Entity: {entity['word']}, Label: {entity['entity']}, Score: {entity['score']:.4f}")
+#     st.json(entity)
 import streamlit as st
 from transformers import pipeline
 # Load the model from the Hugging Face Hub
 ner_pipeline = pipeline("ner", model="Beehzod/smart-finetuned-ner")
+# Helper function to combine subword tokens
+def merge_entities(entities):
+    merged_entities = []
+    current_entity = {"word": "", "entity": None, "score": 0.0, "start": None, "end": None}
+    for token in entities:
+        # Check if it's a new entity or a continuation of the current one
+        if token['entity'].startswith('B-') or (current_entity['entity'] and token['entity'] != current_entity['entity']):
+            # Add the current entity to the list if it exists
+            if current_entity['entity']:
+                current_entity['score'] /= current_entity['count']  # average the score
+                del current_entity['count']  # remove helper key
+                merged_entities.append(current_entity)
+            # Start a new entity
+            current_entity = {
+                "word": token['word'].replace("##", ""),
+                "entity": token['entity'],
+                "score": token['score'],
+                "start": token['start'],
+                "end": token['end'],
+                "count": 1  # for averaging score later
+            }
+        else:
+            # Continue adding to the current entity
+            current_entity["word"] += token['word'].replace("##", "")
+            current_entity["end"] = token['end']
+            current_entity["score"] += token['score']
+            current_entity["count"] += 1
+    # Add the last entity
+    if current_entity['entity']:
+        current_entity['score'] /= current_entity['count']
+        del current_entity['count']
+        merged_entities.append(current_entity)
+    return merged_entities
+# Get input text
+text = st.text_area('Enter text: ')
+# Run NER model if there is input text
+if text:
+    results = ner_pipeline(text)
+    # Merge entities for clean output
+    merged_results = merge_entities(results)
+    # Display merged results
+    for entity in merged_results:
+        st.write(f"Entity: {entity['word']}, Label: {entity['entity']}, Score: {entity['score']:.4f}")
+        st.json(entity)