Spaces:

nlpblogs
/

aientityextractor

Sleeping

App Files Files Community

nlpblogs commited on Feb 5

Commit

b00a147

verified ·

1 Parent(s): aff8600

Create app.py

Browse files

Files changed (1) hide show

app.py +168 -0

app.py ADDED Viewed

	@@ -0,0 +1,168 @@

+import streamlit as st
+import pandas as pd
+from streamlit_extras.stylable_container import stylable_container
+import time
+import zipfile
+import io
+import nltk
+nltk.download('punkt_tab')
+from transformers import AutoTokenizer, AutoModelForTokenClassification
+from transformers import pipeline
+import re
+with st.sidebar:
+  with stylable_container(
+    key="test_button",
+    css_styles="""
+        button {
+            background-color: #0000ff;
+            border: none;
+            color: white;
+        }
+        """,
+    ):
+        st.button("DEMO APP")
+  st.subheader("Glossary of tags", divider = "red")
+  per = st.checkbox("I")
+  if per:
+    st.write("Person's name")
+  org = st.checkbox("ORG")
+  if org:
+    st.write("Organization")
+  loc = st.checkbox("LOC")
+  if loc:
+    st.write("Location")
+  PER = st.checkbox("B-PER")
+  if PER:
+    st.write("Beginning of a person’s name right after another person’s name")
+  ORG = st.checkbox("B-ORG")
+  if ORG:
+    st.write("Beginning of an organisation right after another organization")
+  LOC = st.checkbox("B-LOC")
+  if LOC:
+    st.write("Beginning of a location right after another location")
+  O = st.checkbox("O")
+  if O:
+    st.write("Outside of a named entity")
+st.subheader(":blue[AI Entity Extractor]")
+st.divider()
+def clear_text():
+    st.session_state["text"] = ""
+text = st.text_input("Paste your text here and then press **enter**. The length of your text should not exceed 2000 words.", key="text")
+st.button("Clear text", on_click=clear_text)
+st.write(text)
+from nltk.tokenize import word_tokenize
+text1 = re.sub(r'[^\w\s]','',text)
+tokens = word_tokenize(text1)
+st.write("Length", len(tokens))
+st.divider()
+number = 2000
+if text is not None and len(tokens) > number:
+  st.warning('The length of your text should not exceed 2000 words.')
+  st.stop()
+if text is not None:
+    token_classifier = pipeline(model="Davlan/bert-base-multilingual-cased-ner-hrl", aggregation_strategy="simple")
+    tokens = token_classifier(text)
+    df = pd.DataFrame(tokens)
+    df = df.drop(df[df['word'] == '##s'].index)
+import zipfile
+import io
+dfa = pd.DataFrame(
+       data = {
+           'I': ['Person'],
+           'ORG': ['Organization'],
+           'LOC': ['Location'],
+           'B-PER': ['Beginning of a person’s name right after another person’s name'],
+           'B-ORG': ['Beginning of an organisation right after another organization '],
+           'B-LOC': ['Beginning of a location right after another location'],
+           'O': ['Outside of a named entity ']
+        }
+    )
+buf = io.BytesIO()
+with zipfile.ZipFile(buf, "x") as myzip:
+    if text is not None:
+        myzip.writestr("Summary of the results.csv", df.to_csv())
+        myzip.writestr("Glossary of tags.csv", dfa.to_csv())
+tab1, tab2 = st.tabs(["Summarize", "Download"])
+with tab1:
+    if text is not None:
+        st.dataframe(df, width = 1000)
+with tab2:
+  st.download_button(
+    label = "Download zip file",
+    data=buf.getvalue(),
+    file_name="zip file.zip",
+    mime="application/zip",
+)