nlpblogs commited on
Commit
b00a147
·
verified ·
1 Parent(s): aff8600

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +168 -0
app.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from streamlit_extras.stylable_container import stylable_container
4
+ import time
5
+ import zipfile
6
+ import io
7
+ import nltk
8
+ nltk.download('punkt_tab')
9
+ from transformers import AutoTokenizer, AutoModelForTokenClassification
10
+ from transformers import pipeline
11
+ import re
12
+
13
+
14
+
15
+ with st.sidebar:
16
+ with stylable_container(
17
+ key="test_button",
18
+
19
+ css_styles="""
20
+ button {
21
+ background-color: #0000ff;
22
+ border: none;
23
+ color: white;
24
+ }
25
+ """,
26
+ ):
27
+ st.button("DEMO APP")
28
+
29
+
30
+ st.subheader("Glossary of tags", divider = "red")
31
+
32
+
33
+ per = st.checkbox("I")
34
+ if per:
35
+ st.write("Person's name")
36
+
37
+ org = st.checkbox("ORG")
38
+ if org:
39
+ st.write("Organization")
40
+
41
+ loc = st.checkbox("LOC")
42
+ if loc:
43
+ st.write("Location")
44
+
45
+ PER = st.checkbox("B-PER")
46
+ if PER:
47
+ st.write("Beginning of a person’s name right after another person’s name")
48
+
49
+ ORG = st.checkbox("B-ORG")
50
+ if ORG:
51
+ st.write("Beginning of an organisation right after another organization")
52
+
53
+ LOC = st.checkbox("B-LOC")
54
+ if LOC:
55
+ st.write("Beginning of a location right after another location")
56
+
57
+ O = st.checkbox("O")
58
+ if O:
59
+ st.write("Outside of a named entity")
60
+
61
+
62
+
63
+
64
+
65
+
66
+
67
+
68
+ st.subheader(":blue[AI Entity Extractor]")
69
+
70
+ st.divider()
71
+
72
+
73
+ def clear_text():
74
+ st.session_state["text"] = ""
75
+
76
+ text = st.text_input("Paste your text here and then press **enter**. The length of your text should not exceed 2000 words.", key="text")
77
+ st.button("Clear text", on_click=clear_text)
78
+ st.write(text)
79
+
80
+
81
+
82
+ from nltk.tokenize import word_tokenize
83
+
84
+ text1 = re.sub(r'[^\w\s]','',text)
85
+ tokens = word_tokenize(text1)
86
+ st.write("Length", len(tokens))
87
+ st.divider()
88
+
89
+ number = 2000
90
+
91
+ if text is not None and len(tokens) > number:
92
+ st.warning('The length of your text should not exceed 2000 words.')
93
+ st.stop()
94
+
95
+
96
+
97
+ if text is not None:
98
+ token_classifier = pipeline(model="Davlan/bert-base-multilingual-cased-ner-hrl", aggregation_strategy="simple")
99
+
100
+ tokens = token_classifier(text)
101
+
102
+ df = pd.DataFrame(tokens)
103
+ df = df.drop(df[df['word'] == '##s'].index)
104
+
105
+
106
+
107
+ import zipfile
108
+ import io
109
+
110
+ dfa = pd.DataFrame(
111
+ data = {
112
+ 'I': ['Person'],
113
+ 'ORG': ['Organization'],
114
+ 'LOC': ['Location'],
115
+ 'B-PER': ['Beginning of a person’s name right after another person’s name'],
116
+ 'B-ORG': ['Beginning of an organisation right after another organization '],
117
+ 'B-LOC': ['Beginning of a location right after another location'],
118
+ 'O': ['Outside of a named entity ']
119
+
120
+
121
+ }
122
+ )
123
+
124
+
125
+
126
+
127
+
128
+ buf = io.BytesIO()
129
+
130
+ with zipfile.ZipFile(buf, "x") as myzip:
131
+ if text is not None:
132
+ myzip.writestr("Summary of the results.csv", df.to_csv())
133
+
134
+ myzip.writestr("Glossary of tags.csv", dfa.to_csv())
135
+
136
+
137
+ tab1, tab2 = st.tabs(["Summarize", "Download"])
138
+
139
+
140
+ with tab1:
141
+ if text is not None:
142
+ st.dataframe(df, width = 1000)
143
+
144
+
145
+
146
+ with tab2:
147
+ st.download_button(
148
+ label = "Download zip file",
149
+ data=buf.getvalue(),
150
+ file_name="zip file.zip",
151
+ mime="application/zip",
152
+ )
153
+
154
+
155
+
156
+
157
+
158
+
159
+
160
+
161
+
162
+
163
+
164
+
165
+
166
+
167
+
168
+