File size: 3,840 Bytes
ba99ac9
 
 
 
 
4d544f0
ba99ac9
 
 
 
4d544f0
ba99ac9
 
 
 
 
 
 
 
 
ff08140
ba99ac9
 
ff08140
 
 
 
 
ba99ac9
 
 
 
 
 
 
 
 
 
 
 
 
766d339
ba99ac9
 
 
 
 
 
 
 
 
 
 
 
f3f70d0
ba99ac9
f3f70d0
 
 
 
 
 
ff08140
f3f70d0
ff08140
f3f70d0
 
ba99ac9
 
 
f3f70d0
ba99ac9
 
 
f3f70d0
ba99ac9
 
f3f70d0
ff08140
 
ba99ac9
ff08140
f3f70d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba99ac9
 
f3f70d0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import sentencepiece
import streamlit as st
import pandas as pd
import spacy
import roner

example_list = [
    "Ana merge în București.",
    """Ana merge în București. Ana merge în București. Ana merge în București. Ana merge în București. Ana merge în București. Ana merge în București."""
]

st.set_page_config(layout="wide")

st.title("Demo for Romanian NER")

model_list = ['dumitrescustefan/bert-base-romanian-ner']

st.sidebar.header("Select NER Model")
model_checkpoint = st.sidebar.radio("", model_list)

st.sidebar.write("This demo is based on RoNER: 'https://github.com/dumitrescustefan/roner'")
st.sidebar.write("")


st.sidebar.header("Select type of PERSON detection")
named_persons_only = st.sidebar.radio("", ('Proper nouns only', 'All nouns'))

st.sidebar.write("Types of entities detected: 'PERSON', 'ORG', 'GPE', 'LOC', 'NAT_REL_POL', 'EVENT', 'LANGUAGE', 'WORK_OF_ART', 'DATETIME', 'PERIOD', 'MONEY', 'QUANTITY', 'NUMERIC', 'ORDINAL', 'FACILITY'")

st.subheader("Select Text Input Method")
input_method = st.radio("", ('Select from Examples', 'Write or Paste New Text'))
if input_method == 'Select from Examples':
    selected_text = st.selectbox('Select Text from List', example_list, index=0, key=1)
    st.subheader("Text to Run")
    input_text = st.text_area("Selected Text", selected_text, height=128, max_chars=None, key=2)
elif input_method == "Write or Paste New Text":
    st.subheader("Text to Run")
    input_text = st.text_area('Write or Paste Text Below', value="", height=128, max_chars=None, key=2)

@st.cache(allow_output_mutation=True)
def setModel(named_persons_only):
    ner = roner.NER(named_persons_only=True if named_persons_only.startswith("Proper") else False)
    return ner

@st.cache(allow_output_mutation=True)
def get_html(html: str):
    WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
    html = html.replace("\n", " ")
    return WRAPPER.format(html)
        
Run_Button = st.button("Run", key=None)
if Run_Button == True:
    
    ner = setModel(named_persons_only = False)
    output = ner(input_text)[0] # only one sentence

    # tabular form
    data = []
    for word in output["words"]:
        if word["tag"]!="O":
            data.append({
                "word": word["text"],
                "entity": word["tag"],
                "start_char": word["start_char"],
                "end_char": word["end_char"]
            })
    df = pd.DataFrame.from_dict(data)
    st.subheader("Recognized Entities")
    st.dataframe(df)


    st.subheader("Spacy Style Display")
    spacy_display = {}
    spacy_display["ents"] = []
    spacy_display["text"] = output["text"]
    spacy_display["title"] = None

    for word in output["words"]:
        if word["tag"]!="O":
            spacy_display["ents"].append({"start": word["start_char"], "end": word["end_char"], "label": word["tag"]})
    
    entity_list = ['PERSON', 'ORG', 'GPE', 'LOC', 'NAT_REL_POL',
     'EVENT', 'LANGUAGE', 'WORK_OF_ART', 'DATETIME',
     'PERIOD', 'MONEY', 'QUANTITY', 'NUMERIC',
     'ORDINAL', 'FACILITY']
    colors = {
        'PERSON': '#F00',
        'ORG': '#F00',
        'GPE': '#F00',
        'LOC': '#F00',
        'NAT_REL_POL': '#F00',
        'EVENT': '#F00',
        'LANGUAGE': '#F00',
        'WORK_OF_ART': '#F00',
        'DATETIME': '#F00',
        'PERIOD': '#F00',
        'MONEY': '#F00',
        'QUANTITY': '#F00',
        'NUMERIC': '#F00',
        'ORDINAL': '#F00',
        'FACILITY': '#F00',
    }
    html = spacy.displacy.render(spacy_display, style="ent", minify=True, manual=True, options={"ents": entity_list, "colors": colors})
    style = "<style>mark.entity { display: inline-block }</style>"
    st.write(f"{style}{get_html(html)}", unsafe_allow_html=True)