Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -13,7 +13,6 @@
|
|
13 |
# print(f"Entity: {entity['word']}, Label: {entity['entity']}, Score: {entity['score']:.4f}")
|
14 |
# st.json(entity)
|
15 |
|
16 |
-
|
17 |
import streamlit as st
|
18 |
from transformers import pipeline
|
19 |
|
@@ -23,51 +22,53 @@ ner_pipeline = pipeline("ner", model="Beehzod/smart-finetuned-ner")
|
|
23 |
# Helper function to combine subword tokens
|
24 |
def merge_entities(entities):
|
25 |
merged_entities = []
|
26 |
-
current_entity =
|
27 |
-
|
28 |
for token in entities:
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
35 |
merged_entities.append(current_entity)
|
36 |
-
|
37 |
# Start a new entity
|
38 |
current_entity = {
|
39 |
-
"word":
|
40 |
"entity": token['entity'],
|
41 |
"score": token['score'],
|
42 |
"start": token['start'],
|
43 |
"end": token['end'],
|
44 |
-
"count": 1 # for
|
45 |
}
|
46 |
else:
|
47 |
-
# Continue
|
48 |
-
current_entity["word"] +=
|
49 |
current_entity["end"] = token['end']
|
50 |
current_entity["score"] += token['score']
|
51 |
current_entity["count"] += 1
|
52 |
|
53 |
-
# Add the last entity
|
54 |
-
if current_entity
|
55 |
current_entity['score'] /= current_entity['count']
|
56 |
del current_entity['count']
|
57 |
merged_entities.append(current_entity)
|
58 |
|
59 |
return merged_entities
|
60 |
|
61 |
-
#
|
62 |
-
text = st.text_area('Enter text:
|
63 |
|
64 |
-
# Run NER model
|
65 |
if text:
|
66 |
results = ner_pipeline(text)
|
67 |
-
# Merge entities for clean output
|
68 |
merged_results = merge_entities(results)
|
69 |
-
|
70 |
-
# Display
|
71 |
for entity in merged_results:
|
72 |
-
st.write(f"Entity: {entity['word']}, Label: {entity['entity']}, Score: {entity['score']:.4f}")
|
73 |
st.json(entity)
|
|
|
13 |
# print(f"Entity: {entity['word']}, Label: {entity['entity']}, Score: {entity['score']:.4f}")
|
14 |
# st.json(entity)
|
15 |
|
|
|
16 |
import streamlit as st
|
17 |
from transformers import pipeline
|
18 |
|
|
|
22 |
# Helper function to combine subword tokens
|
23 |
def merge_entities(entities):
|
24 |
merged_entities = []
|
25 |
+
current_entity = None
|
26 |
+
|
27 |
for token in entities:
|
28 |
+
token_text = token['word'].replace("##", "") # Remove subword prefix if any
|
29 |
+
|
30 |
+
# If token starts a new entity
|
31 |
+
if token['entity'].startswith('B-') or (current_entity and token['entity'] != current_entity['entity']):
|
32 |
+
# Add the previous entity if it exists
|
33 |
+
if current_entity:
|
34 |
+
# Average the score for all subwords in the entity
|
35 |
+
current_entity['score'] /= current_entity['count']
|
36 |
+
del current_entity['count']
|
37 |
merged_entities.append(current_entity)
|
38 |
+
|
39 |
# Start a new entity
|
40 |
current_entity = {
|
41 |
+
"word": token_text,
|
42 |
"entity": token['entity'],
|
43 |
"score": token['score'],
|
44 |
"start": token['start'],
|
45 |
"end": token['end'],
|
46 |
+
"count": 1 # Helper count for score averaging
|
47 |
}
|
48 |
else:
|
49 |
+
# Continue with the current entity
|
50 |
+
current_entity["word"] += token_text
|
51 |
current_entity["end"] = token['end']
|
52 |
current_entity["score"] += token['score']
|
53 |
current_entity["count"] += 1
|
54 |
|
55 |
+
# Add the last entity if it exists
|
56 |
+
if current_entity:
|
57 |
current_entity['score'] /= current_entity['count']
|
58 |
del current_entity['count']
|
59 |
merged_entities.append(current_entity)
|
60 |
|
61 |
return merged_entities
|
62 |
|
63 |
+
# Streamlit app to display entities
|
64 |
+
text = st.text_area('Enter text:')
|
65 |
|
66 |
+
# Run NER model and merge results
|
67 |
if text:
|
68 |
results = ner_pipeline(text)
|
|
|
69 |
merged_results = merge_entities(results)
|
70 |
+
|
71 |
+
# Display results
|
72 |
for entity in merged_results:
|
73 |
+
#st.write(f"Entity: {entity['word']}, Label: {entity['entity']}, Score: {entity['score']:.4f}")
|
74 |
st.json(entity)
|