import streamlit as st import pandas as pd from streamlit_extras.stylable_container import stylable_container import time import zipfile import io import nltk nltk.download('punkt_tab') from transformers import AutoTokenizer, AutoModelForTokenClassification from transformers import pipeline import re with st.sidebar: with stylable_container( key="test_button", css_styles=""" button { background-color: #0000ff; border: none; color: white; } """, ): st.button("DEMO APP") st.subheader("Glossary of tags", divider = "red") per = st.checkbox("I") if per: st.write("Person's name") org = st.checkbox("ORG") if org: st.write("Organization") loc = st.checkbox("LOC") if loc: st.write("Location") PER = st.checkbox("B-PER") if PER: st.write("Beginning of a person’s name right after another person’s name") ORG = st.checkbox("B-ORG") if ORG: st.write("Beginning of an organisation right after another organization") LOC = st.checkbox("B-LOC") if LOC: st.write("Beginning of a location right after another location") O = st.checkbox("O") if O: st.write("Outside of a named entity") st.subheader(":blue[AI Entity Extractor]") st.divider() def clear_text(): st.session_state["text"] = "" text = st.text_input("Paste your text here and then press **enter**. The length of your text should not exceed 2000 words.", key="text") st.button("Clear text", on_click=clear_text) st.write(text) from nltk.tokenize import word_tokenize text1 = re.sub(r'[^\w\s]','',text) tokens = word_tokenize(text1) st.write("Length", len(tokens)) st.divider() number = 2000 if text is not None and len(tokens) > number: st.warning('The length of your text should not exceed 2000 words.') st.stop() if text is not None: token_classifier = pipeline(model="Davlan/bert-base-multilingual-cased-ner-hrl", aggregation_strategy="simple") tokens = token_classifier(text) df = pd.DataFrame(tokens) df = df.drop(df[df['word'] == '##s'].index) import zipfile import io dfa = pd.DataFrame( data = { 'I': ['Person'], 'ORG': ['Organization'], 'LOC': ['Location'], 'B-PER': ['Beginning of a person’s name right after another person’s name'], 'B-ORG': ['Beginning of an organisation right after another organization '], 'B-LOC': ['Beginning of a location right after another location'], 'O': ['Outside of a named entity '] } ) buf = io.BytesIO() with zipfile.ZipFile(buf, "x") as myzip: if text is not None: myzip.writestr("Summary of the results.csv", df.to_csv()) myzip.writestr("Glossary of tags.csv", dfa.to_csv()) tab1, tab2 = st.tabs(["Summarize", "Download"]) with tab1: if text is not None: st.dataframe(df, width = 1000) with tab2: st.download_button( label = "Download zip file", data=buf.getvalue(), file_name="zip file.zip", mime="application/zip", )