Spaces:

nlpblogs
/

demonlpapp

Sleeping

File size: 4,565 Bytes

566200c

# Import libraries
import streamlit as st
from streamlit_extras.stylable_container import stylable_container
import nltk
nltk.download('punkt_tab')
from nltk.tokenize import word_tokenize
import re
import time
import pandas as pd
import numpy as np
from transformers import pipeline 




# Title
st.title(":blue[Named Entity Recognition (NER) and Question Answering (QA)]")
st.write("made by [nlpblogs](https://nlpblogs.com/)")
st.write("License [CC BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/deed.en)")
container = st.container(border=True)
container.write("**With this demo app you can tag entities in your text and retrieve answers to your questions.**")


st.divider()

# Text input
def clear_text():
    st.session_state["text"] = ""

text = st.text_area("Paste your text here and then press **Ctrl + Enter**. The length of your text should not exceed 500 words.", key="text")    
st.button("Clear text", on_click=clear_text)



# Word limit
text1 = re.sub(r'[^\w\s]','',text)
tokens = word_tokenize(text1)

st.divider()

number = 500

if text is not None and len(tokens) > number:
    st.warning('The length of your text should not exceed 500 words.')
    st.stop()



# Sidebar
with st.sidebar:
  with stylable_container(
    key="test_button",
    
    css_styles="""
        button { 
            background-color: blue;
            border: 3px solid red;
            padding: 5px;
            color: white;
        }
        """,
    ):
      st.button("DEMO APP")
      st.write("**Original text**")
      container = st.container(border=True)
      container.write(text)
        
      st.write("Length", len(tokens))
    
# NER
with st.spinner('Wait for it...'):
    time.sleep(5)
    if text is not None:
        token_classifier = pipeline(model="huggingface-course/bert-finetuned-ner", aggregation_strategy="max")
        tokens = token_classifier(text)
        df = pd.DataFrame(tokens)
    
properties = {"border": "2px solid gray", "color": "blue", "font-size": "16px"}
df_styled = df.style.set_properties(**properties)




        
st.subheader(":red[Named Entity Recognition (NER)]", divider = "red")

tab1, tab2, = st.tabs(["Entities", "Explanations"])

with tab1:
    st.table(df_styled)

with tab2:
    st.write("**PER** Person's name")
    st.write("**LOC** Location name")
    st.write("**ORG** Organization name")
    st.write("**MISC** Miscellaneous")
    st.write("**entity_group** This is the tag that has been assigned to an entity.")
    st.write("**score** This indicates the confidence level that a tag has been assigned to an entity.")
    st.write("**word** This is the entity that has been extracted from your text data.")
    st.write("**start** This is the index of the start of the corresponding entity in your text data.")
    st.write("**end** This is the index of the end of the corresponding entity in your text data")



# Download                                           

import zipfile
import io

dfa = pd.DataFrame(
       data = {
           'PER': ['Person'],
           'ORG': ['Organization'],
           'LOC': ['Location'],
           'MISC': ['Miscellaneous'],
           'entity_group': ['tag'],
           'score': ['confidence level'],
           'word': ['entity'],
           'start': ['index of the start of the corresponding entity'],
           'end': ['index of the end of the corresponding entity'],

           
       }
    )


buf = io.BytesIO()

with zipfile.ZipFile(buf, "x") as myzip:
    if text is not None:
        myzip.writestr("Summary of the results.csv", df.to_csv())
        
        myzip.writestr("Glossary of tags.csv", dfa.to_csv())
  



with stylable_container(
  key="button",
    
    css_styles="""
        button { 
            background-color: blue;
            border: 3px solid red;
            padding: 5px;
            color: white;
        }
        """,
    ):
    st.download_button(
    
    label = "Download zip file",
    data=buf.getvalue(),
    file_name="zip file.zip",
    mime="application/zip",
)
              


    
st.divider()

# QA

st.subheader(":red[Question Answering (QA)]", divider = "red")




qa = pipeline("question-answering", model="deepset/roberta-base-squad2")





def clear_question():
    st.session_state["question"] = ""

question = st.text_input("Ask a question:", key="question")    
st.button("Clear question", on_click=clear_question)




if st.button("Get Answer"):
    answer = qa(question, text)
    st.divider()
    st.write("Answer:", answer['answer'])
    st.write("Score:", answer['score'] * 100)