Spaces:

Beehzod
/

named_entity_recognizer

Sleeping

App Files Files Community

Beehzod commited on Nov 13, 2024

Commit

18026b7

verified ·

1 Parent(s): bfa4c9d

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -23

app.py CHANGED Viewed

@@ -13,7 +13,6 @@
 #     print(f"Entity: {entity['word']}, Label: {entity['entity']}, Score: {entity['score']:.4f}")
 #     st.json(entity)
 import streamlit as st
 from transformers import pipeline
@@ -23,51 +22,53 @@ ner_pipeline = pipeline("ner", model="Beehzod/smart-finetuned-ner")
 # Helper function to combine subword tokens
 def merge_entities(entities):
     merged_entities = []
-    current_entity = {"word": "", "entity": None, "score": 0.0, "start": None, "end": None}
     for token in entities:
-        # Check if it's a new entity or a continuation of the current one
-        if token['entity'].startswith('B-') or (current_entity['entity'] and token['entity'] != current_entity['entity']):
-            # Add the current entity to the list if it exists
-            if current_entity['entity']:
-                current_entity['score'] /= current_entity['count']  # average the score
-                del current_entity['count']  # remove helper key
                 merged_entities.append(current_entity)
             # Start a new entity
             current_entity = {
-                "word": token['word'].replace("##", ""),
                 "entity": token['entity'],
                 "score": token['score'],
                 "start": token['start'],
                 "end": token['end'],
-                "count": 1  # for averaging score later
             }
         else:
-            # Continue adding to the current entity
-            current_entity["word"] += token['word'].replace("##", "")
             current_entity["end"] = token['end']
             current_entity["score"] += token['score']
             current_entity["count"] += 1
-    # Add the last entity
-    if current_entity['entity']:
         current_entity['score'] /= current_entity['count']
         del current_entity['count']
         merged_entities.append(current_entity)
     return merged_entities
-# Get input text
-text = st.text_area('Enter text: ')
-# Run NER model if there is input text
 if text:
     results = ner_pipeline(text)
-    # Merge entities for clean output
     merged_results = merge_entities(results)
-    # Display merged results
     for entity in merged_results:
-        st.write(f"Entity: {entity['word']}, Label: {entity['entity']}, Score: {entity['score']:.4f}")
         st.json(entity)

 #     print(f"Entity: {entity['word']}, Label: {entity['entity']}, Score: {entity['score']:.4f}")
 #     st.json(entity)
 import streamlit as st
 from transformers import pipeline
 # Helper function to combine subword tokens
 def merge_entities(entities):
     merged_entities = []
+    current_entity = None
     for token in entities:
+        token_text = token['word'].replace("##", "")  # Remove subword prefix if any
+        # If token starts a new entity
+        if token['entity'].startswith('B-') or (current_entity and token['entity'] != current_entity['entity']):
+            # Add the previous entity if it exists
+            if current_entity:
+                # Average the score for all subwords in the entity
+                current_entity['score'] /= current_entity['count']
+                del current_entity['count']
                 merged_entities.append(current_entity)
             # Start a new entity
             current_entity = {
+                "word": token_text,
                 "entity": token['entity'],
                 "score": token['score'],
                 "start": token['start'],
                 "end": token['end'],
+                "count": 1  # Helper count for score averaging
             }
         else:
+            # Continue with the current entity
+            current_entity["word"] += token_text
             current_entity["end"] = token['end']
             current_entity["score"] += token['score']
             current_entity["count"] += 1
+    # Add the last entity if it exists
+    if current_entity:
         current_entity['score'] /= current_entity['count']
         del current_entity['count']
         merged_entities.append(current_entity)
     return merged_entities
+# Streamlit app to display entities
+text = st.text_area('Enter text:')
+# Run NER model and merge results
 if text:
     results = ner_pipeline(text)
     merged_results = merge_entities(results)
+    # Display results
     for entity in merged_results:
+        #st.write(f"Entity: {entity['word']}, Label: {entity['entity']}, Score: {entity['score']:.4f}")
         st.json(entity)