demonlpapp / app.py
Maria Tsilimos
Create app.py
566200c unverified
raw
history blame
4.57 kB
# Import libraries
import streamlit as st
from streamlit_extras.stylable_container import stylable_container
import nltk
nltk.download('punkt_tab')
from nltk.tokenize import word_tokenize
import re
import time
import pandas as pd
import numpy as np
from transformers import pipeline
# Title
st.title(":blue[Named Entity Recognition (NER) and Question Answering (QA)]")
st.write("made by [nlpblogs](https://nlpblogs.com/)")
st.write("License [CC BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/deed.en)")
container = st.container(border=True)
container.write("**With this demo app you can tag entities in your text and retrieve answers to your questions.**")
st.divider()
# Text input
def clear_text():
st.session_state["text"] = ""
text = st.text_area("Paste your text here and then press **Ctrl + Enter**. The length of your text should not exceed 500 words.", key="text")
st.button("Clear text", on_click=clear_text)
# Word limit
text1 = re.sub(r'[^\w\s]','',text)
tokens = word_tokenize(text1)
st.divider()
number = 500
if text is not None and len(tokens) > number:
st.warning('The length of your text should not exceed 500 words.')
st.stop()
# Sidebar
with st.sidebar:
with stylable_container(
key="test_button",
css_styles="""
button {
background-color: blue;
border: 3px solid red;
padding: 5px;
color: white;
}
""",
):
st.button("DEMO APP")
st.write("**Original text**")
container = st.container(border=True)
container.write(text)
st.write("Length", len(tokens))
# NER
with st.spinner('Wait for it...'):
time.sleep(5)
if text is not None:
token_classifier = pipeline(model="huggingface-course/bert-finetuned-ner", aggregation_strategy="max")
tokens = token_classifier(text)
df = pd.DataFrame(tokens)
properties = {"border": "2px solid gray", "color": "blue", "font-size": "16px"}
df_styled = df.style.set_properties(**properties)
st.subheader(":red[Named Entity Recognition (NER)]", divider = "red")
tab1, tab2, = st.tabs(["Entities", "Explanations"])
with tab1:
st.table(df_styled)
with tab2:
st.write("**PER** Person's name")
st.write("**LOC** Location name")
st.write("**ORG** Organization name")
st.write("**MISC** Miscellaneous")
st.write("**entity_group** This is the tag that has been assigned to an entity.")
st.write("**score** This indicates the confidence level that a tag has been assigned to an entity.")
st.write("**word** This is the entity that has been extracted from your text data.")
st.write("**start** This is the index of the start of the corresponding entity in your text data.")
st.write("**end** This is the index of the end of the corresponding entity in your text data")
# Download
import zipfile
import io
dfa = pd.DataFrame(
data = {
'PER': ['Person'],
'ORG': ['Organization'],
'LOC': ['Location'],
'MISC': ['Miscellaneous'],
'entity_group': ['tag'],
'score': ['confidence level'],
'word': ['entity'],
'start': ['index of the start of the corresponding entity'],
'end': ['index of the end of the corresponding entity'],
}
)
buf = io.BytesIO()
with zipfile.ZipFile(buf, "x") as myzip:
if text is not None:
myzip.writestr("Summary of the results.csv", df.to_csv())
myzip.writestr("Glossary of tags.csv", dfa.to_csv())
with stylable_container(
key="button",
css_styles="""
button {
background-color: blue;
border: 3px solid red;
padding: 5px;
color: white;
}
""",
):
st.download_button(
label = "Download zip file",
data=buf.getvalue(),
file_name="zip file.zip",
mime="application/zip",
)
st.divider()
# QA
st.subheader(":red[Question Answering (QA)]", divider = "red")
qa = pipeline("question-answering", model="deepset/roberta-base-squad2")
def clear_question():
st.session_state["question"] = ""
question = st.text_input("Ask a question:", key="question")
st.button("Clear question", on_click=clear_question)
if st.button("Get Answer"):
answer = qa(question, text)
st.divider()
st.write("Answer:", answer['answer'])
st.write("Score:", answer['score'] * 100)