Stefan Dumitrescu commited on
Commit
ff08140
·
1 Parent(s): f3f70d0
Files changed (1) hide show
  1. app.py +11 -23
app.py CHANGED
@@ -18,24 +18,14 @@ model_list = ['dumitrescustefan/bert-base-romanian-ner']
18
  st.sidebar.header("Select NER Model")
19
  model_checkpoint = st.sidebar.radio("", model_list)
20
 
21
- st.sidebar.write("For details of models: 'https://huggingface.co/dumitrescustefan/")
22
  st.sidebar.write("")
23
 
24
- xlm_agg_strategy_info = "'aggregation_strategy' can be selected as 'simple' or 'none' for 'xlm-roberta' because of the RoBERTa model's tokenization approach."
25
-
26
- st.sidebar.header("Select Aggregation Strategy Type")
27
- if model_checkpoint == "akdeniz27/xlm-roberta-base-turkish-ner":
28
- aggregation = st.sidebar.radio("", ('simple', 'none'))
29
- st.sidebar.write(xlm_agg_strategy_info)
30
- elif model_checkpoint == "xlm-roberta-large-finetuned-conll03-english":
31
- aggregation = st.sidebar.radio("", ('simple', 'none'))
32
- st.sidebar.write(xlm_agg_strategy_info)
33
- st.sidebar.write("")
34
- st.sidebar.write("This English NER model is included just to show the zero-shot transfer learning capability of XLM-Roberta.")
35
- else:
36
- aggregation = st.sidebar.radio("", ('first', 'simple', 'average', 'max', 'none'))
37
-
38
- st.sidebar.write("Please refer 'https://huggingface.co/transformers/_modules/transformers/pipelines/token_classification.html' for entity grouping with aggregation_strategy parameter.")
39
 
40
  st.subheader("Select Text Input Method")
41
  input_method = st.radio("", ('Select from Examples', 'Write or Paste New Text'))
@@ -70,10 +60,9 @@ if Run_Button == True:
70
  if word["tag"]!="O":
71
  data.append({
72
  "word": word["text"],
73
- "tag": word["tag"],
74
  "start_char": word["start_char"],
75
- "end_char": word["end_char"],
76
- "span_after": word["span_after"],
77
  })
78
  df = pd.DataFrame.from_dict(data)
79
  st.subheader("Recognized Entities")
@@ -87,15 +76,14 @@ if Run_Button == True:
87
  spacy_display["title"] = None
88
 
89
  for word in output["words"]:
90
- #spacy_display["ents"].append({"start": entity["start"], "end": entity["end"], "label": entity["entity_group"]})
91
- spacy_display["ents"].append({"start": word["start_char"], "end": word["end_char"], "label": word["tag"]})
92
 
93
- entity_list = ['O', 'PERSON', 'ORG', 'GPE', 'LOC', 'NAT_REL_POL',
94
  'EVENT', 'LANGUAGE', 'WORK_OF_ART', 'DATETIME',
95
  'PERIOD', 'MONEY', 'QUANTITY', 'NUMERIC',
96
  'ORDINAL', 'FACILITY']
97
  colors = {
98
- 'O': '#FFF',
99
  'PERSON': '#F00',
100
  'ORG': '#F00',
101
  'GPE': '#F00',
 
18
  st.sidebar.header("Select NER Model")
19
  model_checkpoint = st.sidebar.radio("", model_list)
20
 
21
+ st.sidebar.write("This demo is based on RoNER: 'https://github.com/dumitrescustefan/roner'")
22
  st.sidebar.write("")
23
 
24
+
25
+ st.sidebar.header("Select type of PERSON detection")
26
+ named_persons_only = st.sidebar.radio("", ('Proper nouns only', 'All nouns'))
27
+
28
+ st.sidebar.write("Types of entities detected: 'PERSON', 'ORG', 'GPE', 'LOC', 'NAT_REL_POL', 'EVENT', 'LANGUAGE', 'WORK_OF_ART', 'DATETIME', 'PERIOD', 'MONEY', 'QUANTITY', 'NUMERIC', 'ORDINAL', 'FACILITY'")
 
 
 
 
 
 
 
 
 
 
29
 
30
  st.subheader("Select Text Input Method")
31
  input_method = st.radio("", ('Select from Examples', 'Write or Paste New Text'))
 
60
  if word["tag"]!="O":
61
  data.append({
62
  "word": word["text"],
63
+ "entity": word["tag"],
64
  "start_char": word["start_char"],
65
+ "end_char": word["end_char"]
 
66
  })
67
  df = pd.DataFrame.from_dict(data)
68
  st.subheader("Recognized Entities")
 
76
  spacy_display["title"] = None
77
 
78
  for word in output["words"]:
79
+ if word["tag"]!="O":
80
+ spacy_display["ents"].append({"start": word["start_char"], "end": word["end_char"], "label": word["tag"]})
81
 
82
+ entity_list = ['PERSON', 'ORG', 'GPE', 'LOC', 'NAT_REL_POL',
83
  'EVENT', 'LANGUAGE', 'WORK_OF_ART', 'DATETIME',
84
  'PERIOD', 'MONEY', 'QUANTITY', 'NUMERIC',
85
  'ORDINAL', 'FACILITY']
86
  colors = {
 
87
  'PERSON': '#F00',
88
  'ORG': '#F00',
89
  'GPE': '#F00',