File size: 3,865 Bytes
ba99ac9
 
 
 
 
4d544f0
ba99ac9
 
 
 
4d544f0
ba99ac9
 
 
 
 
 
 
 
 
ff08140
ba99ac9
 
ff08140
 
543d322
ff08140
 
ba99ac9
 
 
 
 
 
 
 
 
 
 
 
 
543d322
ba99ac9
 
 
 
 
 
 
 
 
 
 
543d322
f3f70d0
ba99ac9
f3f70d0
 
 
 
 
 
ff08140
f3f70d0
ff08140
f3f70d0
 
ba99ac9
 
 
f3f70d0
ba99ac9
 
 
f3f70d0
ba99ac9
 
f3f70d0
ff08140
 
ba99ac9
ff08140
f3f70d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba99ac9
 
f3f70d0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import sentencepiece
import streamlit as st
import pandas as pd
import spacy
import roner

example_list = [
    "Ana merge în București.",
    """Ana merge în București. Ana merge în București. Ana merge în București. Ana merge în București. Ana merge în București. Ana merge în București."""
]

st.set_page_config(layout="wide")

st.title("Demo for Romanian NER")

model_list = ['dumitrescustefan/bert-base-romanian-ner']

st.sidebar.header("Select NER Model")
model_checkpoint = st.sidebar.radio("", model_list)

st.sidebar.write("This demo is based on RoNER: 'https://github.com/dumitrescustefan/roner'")
st.sidebar.write("")


st.sidebar.header("Select type of PERSON detection")
named_persons_only_radio = st.sidebar.radio("", ('Proper nouns only', 'All nouns'))

st.sidebar.write("Types of entities detected: 'PERSON', 'ORG', 'GPE', 'LOC', 'NAT_REL_POL', 'EVENT', 'LANGUAGE', 'WORK_OF_ART', 'DATETIME', 'PERIOD', 'MONEY', 'QUANTITY', 'NUMERIC', 'ORDINAL', 'FACILITY'")

st.subheader("Select Text Input Method")
input_method = st.radio("", ('Select from Examples', 'Write or Paste New Text'))
if input_method == 'Select from Examples':
    selected_text = st.selectbox('Select Text from List', example_list, index=0, key=1)
    st.subheader("Text to Run")
    input_text = st.text_area("Selected Text", selected_text, height=128, max_chars=None, key=2)
elif input_method == "Write or Paste New Text":
    st.subheader("Text to Run")
    input_text = st.text_area('Write or Paste Text Below', value="", height=128, max_chars=None, key=2)

@st.cache(allow_output_mutation=True)
def setModel(named_persons_only):
    ner = roner.NER(named_persons_only=named_persons_only)
    return ner

@st.cache(allow_output_mutation=True)
def get_html(html: str):
    WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
    html = html.replace("\n", " ")
    return WRAPPER.format(html)
        
Run_Button = st.button("Run", key=None)
if Run_Button == True:
    
    ner = setModel(named_persons_only = True if named_persons_only_radio.startswith("Proper") else False)
    output = ner(input_text)[0] # only one sentence

    # tabular form
    data = []
    for word in output["words"]:
        if word["tag"]!="O":
            data.append({
                "word": word["text"],
                "entity": word["tag"],
                "start_char": word["start_char"],
                "end_char": word["end_char"]
            })
    df = pd.DataFrame.from_dict(data)
    st.subheader("Recognized Entities")
    st.dataframe(df)


    st.subheader("Spacy Style Display")
    spacy_display = {}
    spacy_display["ents"] = []
    spacy_display["text"] = output["text"]
    spacy_display["title"] = None

    for word in output["words"]:
        if word["tag"]!="O":
            spacy_display["ents"].append({"start": word["start_char"], "end": word["end_char"], "label": word["tag"]})
    
    entity_list = ['PERSON', 'ORG', 'GPE', 'LOC', 'NAT_REL_POL',
     'EVENT', 'LANGUAGE', 'WORK_OF_ART', 'DATETIME',
     'PERIOD', 'MONEY', 'QUANTITY', 'NUMERIC',
     'ORDINAL', 'FACILITY']
    colors = {
        'PERSON': '#F00',
        'ORG': '#F00',
        'GPE': '#F00',
        'LOC': '#F00',
        'NAT_REL_POL': '#F00',
        'EVENT': '#F00',
        'LANGUAGE': '#F00',
        'WORK_OF_ART': '#F00',
        'DATETIME': '#F00',
        'PERIOD': '#F00',
        'MONEY': '#F00',
        'QUANTITY': '#F00',
        'NUMERIC': '#F00',
        'ORDINAL': '#F00',
        'FACILITY': '#F00',
    }
    html = spacy.displacy.render(spacy_display, style="ent", minify=True, manual=True, options={"ents": entity_list, "colors": colors})
    style = "<style>mark.entity { display: inline-block }</style>"
    st.write(f"{style}{get_html(html)}", unsafe_allow_html=True)