Spaces:
Sleeping
Sleeping
# Import libraries | |
import streamlit as st | |
from streamlit_extras.stylable_container import stylable_container | |
import nltk | |
nltk.download('punkt_tab') | |
from nltk.tokenize import word_tokenize | |
import re | |
import time | |
import pandas as pd | |
import numpy as np | |
from transformers import pipeline | |
# Title | |
st.title(":blue[Named Entity Recognition (NER) and Question Answering (QA)]") | |
st.write("made by [nlpblogs](https://nlpblogs.com/)") | |
st.write("License [CC BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/deed.en)") | |
container = st.container(border=True) | |
container.write("**With this demo app you can tag entities in your text and retrieve answers to your questions.**") | |
st.divider() | |
# Text input | |
def clear_text(): | |
st.session_state["text"] = "" | |
text = st.text_area("Paste your text here and then press **Ctrl + Enter**. The length of your text should not exceed 500 words.", key="text") | |
st.button("Clear text", on_click=clear_text) | |
# Word limit | |
text1 = re.sub(r'[^\w\s]','',text) | |
tokens = word_tokenize(text1) | |
st.divider() | |
number = 500 | |
if text is not None and len(tokens) > number: | |
st.warning('The length of your text should not exceed 500 words.') | |
st.stop() | |
# Sidebar | |
with st.sidebar: | |
with stylable_container( | |
key="test_button", | |
css_styles=""" | |
button { | |
background-color: blue; | |
border: 3px solid red; | |
padding: 5px; | |
color: white; | |
} | |
""", | |
): | |
st.button("DEMO APP") | |
st.write("**Original text**") | |
container = st.container(border=True) | |
container.write(text) | |
st.write("Length", len(tokens)) | |
# NER | |
with st.spinner('Wait for it...'): | |
time.sleep(5) | |
if text is not None: | |
token_classifier = pipeline(model="huggingface-course/bert-finetuned-ner", aggregation_strategy="max") | |
tokens = token_classifier(text) | |
df = pd.DataFrame(tokens) | |
properties = {"border": "2px solid gray", "color": "blue", "font-size": "16px"} | |
df_styled = df.style.set_properties(**properties) | |
st.subheader(":red[Named Entity Recognition (NER)]", divider = "red") | |
tab1, tab2, = st.tabs(["Entities", "Explanations"]) | |
with tab1: | |
st.table(df_styled) | |
with tab2: | |
st.write("**PER** Person's name") | |
st.write("**LOC** Location name") | |
st.write("**ORG** Organization name") | |
st.write("**MISC** Miscellaneous") | |
st.write("**entity_group** This is the tag that has been assigned to an entity.") | |
st.write("**score** This indicates the confidence level that a tag has been assigned to an entity.") | |
st.write("**word** This is the entity that has been extracted from your text data.") | |
st.write("**start** This is the index of the start of the corresponding entity in your text data.") | |
st.write("**end** This is the index of the end of the corresponding entity in your text data") | |
# Download | |
import zipfile | |
import io | |
dfa = pd.DataFrame( | |
data = { | |
'PER': ['Person'], | |
'ORG': ['Organization'], | |
'LOC': ['Location'], | |
'MISC': ['Miscellaneous'], | |
'entity_group': ['tag'], | |
'score': ['confidence level'], | |
'word': ['entity'], | |
'start': ['index of the start of the corresponding entity'], | |
'end': ['index of the end of the corresponding entity'], | |
} | |
) | |
buf = io.BytesIO() | |
with zipfile.ZipFile(buf, "x") as myzip: | |
if text is not None: | |
myzip.writestr("Summary of the results.csv", df.to_csv()) | |
myzip.writestr("Glossary of tags.csv", dfa.to_csv()) | |
with stylable_container( | |
key="button", | |
css_styles=""" | |
button { | |
background-color: blue; | |
border: 3px solid red; | |
padding: 5px; | |
color: white; | |
} | |
""", | |
): | |
st.download_button( | |
label = "Download zip file", | |
data=buf.getvalue(), | |
file_name="zip file.zip", | |
mime="application/zip", | |
) | |
st.divider() | |
# QA | |
st.subheader(":red[Question Answering (QA)]", divider = "red") | |
qa = pipeline("question-answering", model="deepset/roberta-base-squad2") | |
def clear_question(): | |
st.session_state["question"] = "" | |
question = st.text_input("Ask a question:", key="question") | |
st.button("Clear question", on_click=clear_question) | |
if st.button("Get Answer"): | |
answer = qa(question, text) | |
st.divider() | |
st.write("Answer:", answer['answer']) | |
st.write("Score:", answer['score'] * 100) | |