# Import libraries import streamlit as st from streamlit_extras.stylable_container import stylable_container import nltk nltk.download('punkt_tab') from nltk.tokenize import word_tokenize import re import time import pandas as pd import numpy as np from transformers import pipeline # Title st.title(":blue[Named Entity Recognition (NER) and Question Answering (QA)]") st.write("made by [nlpblogs](https://nlpblogs.com/)") st.write("License [CC BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/deed.en)") container = st.container(border=True) container.write("**With this demo app you can tag entities in your text and retrieve answers to your questions.**") st.divider() # Text input def clear_text(): st.session_state["text"] = "" text = st.text_area("Paste your text here and then press **Ctrl + Enter**. The length of your text should not exceed 500 words.", key="text") st.button("Clear text", on_click=clear_text) # Word limit text1 = re.sub(r'[^\w\s]','',text) tokens = word_tokenize(text1) st.divider() number = 500 if text is not None and len(tokens) > number: st.warning('The length of your text should not exceed 500 words.') st.stop() # Sidebar with st.sidebar: with stylable_container( key="test_button", css_styles=""" button { background-color: blue; border: 3px solid red; padding: 5px; color: white; } """, ): st.button("DEMO APP") st.write("**Original text**") container = st.container(border=True) container.write(text) st.write("Length", len(tokens)) # NER with st.spinner('Wait for it...'): time.sleep(5) if text is not None: token_classifier = pipeline(model="huggingface-course/bert-finetuned-ner", aggregation_strategy="max") tokens = token_classifier(text) df = pd.DataFrame(tokens) properties = {"border": "2px solid gray", "color": "blue", "font-size": "16px"} df_styled = df.style.set_properties(**properties) st.subheader(":red[Named Entity Recognition (NER)]", divider = "red") tab1, tab2, = st.tabs(["Entities", "Explanations"]) with tab1: st.table(df_styled) with tab2: st.write("**PER** Person's name") st.write("**LOC** Location name") st.write("**ORG** Organization name") st.write("**MISC** Miscellaneous") st.write("**entity_group** This is the tag that has been assigned to an entity.") st.write("**score** This indicates the confidence level that a tag has been assigned to an entity.") st.write("**word** This is the entity that has been extracted from your text data.") st.write("**start** This is the index of the start of the corresponding entity in your text data.") st.write("**end** This is the index of the end of the corresponding entity in your text data") # Download import zipfile import io dfa = pd.DataFrame( data = { 'PER': ['Person'], 'ORG': ['Organization'], 'LOC': ['Location'], 'MISC': ['Miscellaneous'], 'entity_group': ['tag'], 'score': ['confidence level'], 'word': ['entity'], 'start': ['index of the start of the corresponding entity'], 'end': ['index of the end of the corresponding entity'], } ) buf = io.BytesIO() with zipfile.ZipFile(buf, "x") as myzip: if text is not None: myzip.writestr("Summary of the results.csv", df.to_csv()) myzip.writestr("Glossary of tags.csv", dfa.to_csv()) with stylable_container( key="button", css_styles=""" button { background-color: blue; border: 3px solid red; padding: 5px; color: white; } """, ): st.download_button( label = "Download zip file", data=buf.getvalue(), file_name="zip file.zip", mime="application/zip", ) st.divider() # QA st.subheader(":red[Question Answering (QA)]", divider = "red") qa = pipeline("question-answering", model="deepset/roberta-base-squad2") def clear_question(): st.session_state["question"] = "" question = st.text_input("Ask a question:", key="question") st.button("Clear question", on_click=clear_question) if st.button("Get Answer"): answer = qa(question, text) st.divider() st.write("Answer:", answer['answer']) st.write("Score:", answer['score'] * 100)