Spaces:

nlpblogs
/

demonlpapp

Sleeping

demonlpapp / app.py

Maria Tsilimos

Create app.py

566200c unverified about 1 month ago

4.57 kB

	# Import libraries
	import streamlit as st
	from streamlit_extras.stylable_container import stylable_container
	import nltk
	nltk.download('punkt_tab')
	from nltk.tokenize import word_tokenize
	import re
	import time
	import pandas as pd
	import numpy as np
	from transformers import pipeline




	# Title
	st.title(":blue[Named Entity Recognition (NER) and Question Answering (QA)]")
	st.write("made by [nlpblogs](https://nlpblogs.com/)")
	st.write("License [CC BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/deed.en)")
	container = st.container(border=True)
	container.write("With this demo app you can tag entities in your text and retrieve answers to your questions.")


	st.divider()

	# Text input
	def clear_text():
	st.session_state["text"] = ""

	text = st.text_area("Paste your text here and then press Ctrl + Enter. The length of your text should not exceed 500 words.", key="text")
	st.button("Clear text", on_click=clear_text)



	# Word limit
	text1 = re.sub(r'[^\w\s]','',text)
	tokens = word_tokenize(text1)

	st.divider()

	number = 500

	if text is not None and len(tokens) > number:
	st.warning('The length of your text should not exceed 500 words.')
	st.stop()



	# Sidebar
	with st.sidebar:
	with stylable_container(
	key="test_button",

	css_styles="""
	button {
	background-color: blue;
	border: 3px solid red;
	padding: 5px;
	color: white;
	}
	""",
	):
	st.button("DEMO APP")
	st.write("Original text")
	container = st.container(border=True)
	container.write(text)

	st.write("Length", len(tokens))

	# NER
	with st.spinner('Wait for it...'):
	time.sleep(5)
	if text is not None:
	token_classifier = pipeline(model="huggingface-course/bert-finetuned-ner", aggregation_strategy="max")
	tokens = token_classifier(text)
	df = pd.DataFrame(tokens)

	properties = {"border": "2px solid gray", "color": "blue", "font-size": "16px"}
	df_styled = df.style.set_properties(**properties)





	st.subheader(":red[Named Entity Recognition (NER)]", divider = "red")

	tab1, tab2, = st.tabs(["Entities", "Explanations"])

	with tab1:
	st.table(df_styled)

	with tab2:
	st.write("PER Person's name")
	st.write("LOC Location name")
	st.write("ORG Organization name")
	st.write("MISC Miscellaneous")
	st.write("entity_group This is the tag that has been assigned to an entity.")
	st.write("score This indicates the confidence level that a tag has been assigned to an entity.")
	st.write("word This is the entity that has been extracted from your text data.")
	st.write("start This is the index of the start of the corresponding entity in your text data.")
	st.write("end This is the index of the end of the corresponding entity in your text data")



	# Download

	import zipfile
	import io

	dfa = pd.DataFrame(
	data = {
	'PER': ['Person'],
	'ORG': ['Organization'],
	'LOC': ['Location'],
	'MISC': ['Miscellaneous'],
	'entity_group': ['tag'],
	'score': ['confidence level'],
	'word': ['entity'],
	'start': ['index of the start of the corresponding entity'],
	'end': ['index of the end of the corresponding entity'],


	}
	)


	buf = io.BytesIO()

	with zipfile.ZipFile(buf, "x") as myzip:
	if text is not None:
	myzip.writestr("Summary of the results.csv", df.to_csv())

	myzip.writestr("Glossary of tags.csv", dfa.to_csv())




	with stylable_container(
	key="button",

	css_styles="""
	button {
	background-color: blue;
	border: 3px solid red;
	padding: 5px;
	color: white;
	}
	""",
	):
	st.download_button(

	label = "Download zip file",
	data=buf.getvalue(),
	file_name="zip file.zip",
	mime="application/zip",
	)




	st.divider()

	# QA

	st.subheader(":red[Question Answering (QA)]", divider = "red")




	qa = pipeline("question-answering", model="deepset/roberta-base-squad2")





	def clear_question():
	st.session_state["question"] = ""

	question = st.text_input("Ask a question:", key="question")
	st.button("Clear question", on_click=clear_question)




	if st.button("Get Answer"):
	answer = qa(question, text)
	st.divider()
	st.write("Answer:", answer['answer'])
	st.write("Score:", answer['score'] * 100)