nlpblogs's picture
Create app.py
b00a147 verified
raw
history blame
3.22 kB
import streamlit as st
import pandas as pd
from streamlit_extras.stylable_container import stylable_container
import time
import zipfile
import io
import nltk
nltk.download('punkt_tab')
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline
import re
with st.sidebar:
with stylable_container(
key="test_button",
css_styles="""
button {
background-color: #0000ff;
border: none;
color: white;
}
""",
):
st.button("DEMO APP")
st.subheader("Glossary of tags", divider = "red")
per = st.checkbox("I")
if per:
st.write("Person's name")
org = st.checkbox("ORG")
if org:
st.write("Organization")
loc = st.checkbox("LOC")
if loc:
st.write("Location")
PER = st.checkbox("B-PER")
if PER:
st.write("Beginning of a person’s name right after another person’s name")
ORG = st.checkbox("B-ORG")
if ORG:
st.write("Beginning of an organisation right after another organization")
LOC = st.checkbox("B-LOC")
if LOC:
st.write("Beginning of a location right after another location")
O = st.checkbox("O")
if O:
st.write("Outside of a named entity")
st.subheader(":blue[AI Entity Extractor]")
st.divider()
def clear_text():
st.session_state["text"] = ""
text = st.text_input("Paste your text here and then press **enter**. The length of your text should not exceed 2000 words.", key="text")
st.button("Clear text", on_click=clear_text)
st.write(text)
from nltk.tokenize import word_tokenize
text1 = re.sub(r'[^\w\s]','',text)
tokens = word_tokenize(text1)
st.write("Length", len(tokens))
st.divider()
number = 2000
if text is not None and len(tokens) > number:
st.warning('The length of your text should not exceed 2000 words.')
st.stop()
if text is not None:
token_classifier = pipeline(model="Davlan/bert-base-multilingual-cased-ner-hrl", aggregation_strategy="simple")
tokens = token_classifier(text)
df = pd.DataFrame(tokens)
df = df.drop(df[df['word'] == '##s'].index)
import zipfile
import io
dfa = pd.DataFrame(
data = {
'I': ['Person'],
'ORG': ['Organization'],
'LOC': ['Location'],
'B-PER': ['Beginning of a person’s name right after another person’s name'],
'B-ORG': ['Beginning of an organisation right after another organization '],
'B-LOC': ['Beginning of a location right after another location'],
'O': ['Outside of a named entity ']
}
)
buf = io.BytesIO()
with zipfile.ZipFile(buf, "x") as myzip:
if text is not None:
myzip.writestr("Summary of the results.csv", df.to_csv())
myzip.writestr("Glossary of tags.csv", dfa.to_csv())
tab1, tab2 = st.tabs(["Summarize", "Download"])
with tab1:
if text is not None:
st.dataframe(df, width = 1000)
with tab2:
st.download_button(
label = "Download zip file",
data=buf.getvalue(),
file_name="zip file.zip",
mime="application/zip",
)