Beehzod commited on
Commit
18026b7
·
verified ·
1 Parent(s): bfa4c9d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -23
app.py CHANGED
@@ -13,7 +13,6 @@
13
  # print(f"Entity: {entity['word']}, Label: {entity['entity']}, Score: {entity['score']:.4f}")
14
  # st.json(entity)
15
 
16
-
17
  import streamlit as st
18
  from transformers import pipeline
19
 
@@ -23,51 +22,53 @@ ner_pipeline = pipeline("ner", model="Beehzod/smart-finetuned-ner")
23
  # Helper function to combine subword tokens
24
  def merge_entities(entities):
25
  merged_entities = []
26
- current_entity = {"word": "", "entity": None, "score": 0.0, "start": None, "end": None}
27
-
28
  for token in entities:
29
- # Check if it's a new entity or a continuation of the current one
30
- if token['entity'].startswith('B-') or (current_entity['entity'] and token['entity'] != current_entity['entity']):
31
- # Add the current entity to the list if it exists
32
- if current_entity['entity']:
33
- current_entity['score'] /= current_entity['count'] # average the score
34
- del current_entity['count'] # remove helper key
 
 
 
35
  merged_entities.append(current_entity)
36
-
37
  # Start a new entity
38
  current_entity = {
39
- "word": token['word'].replace("##", ""),
40
  "entity": token['entity'],
41
  "score": token['score'],
42
  "start": token['start'],
43
  "end": token['end'],
44
- "count": 1 # for averaging score later
45
  }
46
  else:
47
- # Continue adding to the current entity
48
- current_entity["word"] += token['word'].replace("##", "")
49
  current_entity["end"] = token['end']
50
  current_entity["score"] += token['score']
51
  current_entity["count"] += 1
52
 
53
- # Add the last entity
54
- if current_entity['entity']:
55
  current_entity['score'] /= current_entity['count']
56
  del current_entity['count']
57
  merged_entities.append(current_entity)
58
 
59
  return merged_entities
60
 
61
- # Get input text
62
- text = st.text_area('Enter text: ')
63
 
64
- # Run NER model if there is input text
65
  if text:
66
  results = ner_pipeline(text)
67
- # Merge entities for clean output
68
  merged_results = merge_entities(results)
69
-
70
- # Display merged results
71
  for entity in merged_results:
72
- st.write(f"Entity: {entity['word']}, Label: {entity['entity']}, Score: {entity['score']:.4f}")
73
  st.json(entity)
 
13
  # print(f"Entity: {entity['word']}, Label: {entity['entity']}, Score: {entity['score']:.4f}")
14
  # st.json(entity)
15
 
 
16
  import streamlit as st
17
  from transformers import pipeline
18
 
 
22
  # Helper function to combine subword tokens
23
  def merge_entities(entities):
24
  merged_entities = []
25
+ current_entity = None
26
+
27
  for token in entities:
28
+ token_text = token['word'].replace("##", "") # Remove subword prefix if any
29
+
30
+ # If token starts a new entity
31
+ if token['entity'].startswith('B-') or (current_entity and token['entity'] != current_entity['entity']):
32
+ # Add the previous entity if it exists
33
+ if current_entity:
34
+ # Average the score for all subwords in the entity
35
+ current_entity['score'] /= current_entity['count']
36
+ del current_entity['count']
37
  merged_entities.append(current_entity)
38
+
39
  # Start a new entity
40
  current_entity = {
41
+ "word": token_text,
42
  "entity": token['entity'],
43
  "score": token['score'],
44
  "start": token['start'],
45
  "end": token['end'],
46
+ "count": 1 # Helper count for score averaging
47
  }
48
  else:
49
+ # Continue with the current entity
50
+ current_entity["word"] += token_text
51
  current_entity["end"] = token['end']
52
  current_entity["score"] += token['score']
53
  current_entity["count"] += 1
54
 
55
+ # Add the last entity if it exists
56
+ if current_entity:
57
  current_entity['score'] /= current_entity['count']
58
  del current_entity['count']
59
  merged_entities.append(current_entity)
60
 
61
  return merged_entities
62
 
63
+ # Streamlit app to display entities
64
+ text = st.text_area('Enter text:')
65
 
66
+ # Run NER model and merge results
67
  if text:
68
  results = ner_pipeline(text)
 
69
  merged_results = merge_entities(results)
70
+
71
+ # Display results
72
  for entity in merged_results:
73
+ #st.write(f"Entity: {entity['word']}, Label: {entity['entity']}, Score: {entity['score']:.4f}")
74
  st.json(entity)