File size: 4,565 Bytes
566200c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
# Import libraries
import streamlit as st
from streamlit_extras.stylable_container import stylable_container
import nltk
nltk.download('punkt_tab')
from nltk.tokenize import word_tokenize
import re
import time
import pandas as pd
import numpy as np
from transformers import pipeline 




# Title
st.title(":blue[Named Entity Recognition (NER) and Question Answering (QA)]")
st.write("made by [nlpblogs](https://nlpblogs.com/)")
st.write("License [CC BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/deed.en)")
container = st.container(border=True)
container.write("**With this demo app you can tag entities in your text and retrieve answers to your questions.**")


st.divider()

# Text input
def clear_text():
    st.session_state["text"] = ""

text = st.text_area("Paste your text here and then press **Ctrl + Enter**. The length of your text should not exceed 500 words.", key="text")    
st.button("Clear text", on_click=clear_text)



# Word limit
text1 = re.sub(r'[^\w\s]','',text)
tokens = word_tokenize(text1)

st.divider()

number = 500

if text is not None and len(tokens) > number:
    st.warning('The length of your text should not exceed 500 words.')
    st.stop()



# Sidebar
with st.sidebar:
  with stylable_container(
    key="test_button",
    
    css_styles="""
        button { 
            background-color: blue;
            border: 3px solid red;
            padding: 5px;
            color: white;
        }
        """,
    ):
      st.button("DEMO APP")
      st.write("**Original text**")
      container = st.container(border=True)
      container.write(text)
        
      st.write("Length", len(tokens))
    
# NER
with st.spinner('Wait for it...'):
    time.sleep(5)
    if text is not None:
        token_classifier = pipeline(model="huggingface-course/bert-finetuned-ner", aggregation_strategy="max")
        tokens = token_classifier(text)
        df = pd.DataFrame(tokens)
    
properties = {"border": "2px solid gray", "color": "blue", "font-size": "16px"}
df_styled = df.style.set_properties(**properties)




        
st.subheader(":red[Named Entity Recognition (NER)]", divider = "red")

tab1, tab2, = st.tabs(["Entities", "Explanations"])

with tab1:
    st.table(df_styled)

with tab2:
    st.write("**PER** Person's name")
    st.write("**LOC** Location name")
    st.write("**ORG** Organization name")
    st.write("**MISC** Miscellaneous")
    st.write("**entity_group** This is the tag that has been assigned to an entity.")
    st.write("**score** This indicates the confidence level that a tag has been assigned to an entity.")
    st.write("**word** This is the entity that has been extracted from your text data.")
    st.write("**start** This is the index of the start of the corresponding entity in your text data.")
    st.write("**end** This is the index of the end of the corresponding entity in your text data")



# Download                                           

import zipfile
import io

dfa = pd.DataFrame(
       data = {
           'PER': ['Person'],
           'ORG': ['Organization'],
           'LOC': ['Location'],
           'MISC': ['Miscellaneous'],
           'entity_group': ['tag'],
           'score': ['confidence level'],
           'word': ['entity'],
           'start': ['index of the start of the corresponding entity'],
           'end': ['index of the end of the corresponding entity'],

           
       }
    )


buf = io.BytesIO()

with zipfile.ZipFile(buf, "x") as myzip:
    if text is not None:
        myzip.writestr("Summary of the results.csv", df.to_csv())
        
        myzip.writestr("Glossary of tags.csv", dfa.to_csv())
  



with stylable_container(
  key="button",
    
    css_styles="""
        button { 
            background-color: blue;
            border: 3px solid red;
            padding: 5px;
            color: white;
        }
        """,
    ):
    st.download_button(
    
    label = "Download zip file",
    data=buf.getvalue(),
    file_name="zip file.zip",
    mime="application/zip",
)
              


    
st.divider()

# QA

st.subheader(":red[Question Answering (QA)]", divider = "red")




qa = pipeline("question-answering", model="deepset/roberta-base-squad2")





def clear_question():
    st.session_state["question"] = ""

question = st.text_input("Ask a question:", key="question")    
st.button("Clear question", on_click=clear_question)




if st.button("Get Answer"):
    answer = qa(question, text)
    st.divider()
    st.write("Answer:", answer['answer'])
    st.write("Score:", answer['score'] * 100)