File size: 4,722 Bytes
9c37e72 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
"""
## App: NLP App with Streamlit
Credits: Streamlit Team,Marc Skov Madsen(For Awesome-streamlit gallery)
Description
This is a Natural Language Processing(NLP) Based App useful for basic NLP concepts such as follows;
+ Tokenization & Lemmatization using Spacy
+ Named Entity Recognition(NER) using SpaCy
+ Sentiment Analysis using TextBlob
+ Document/Text Summarization using Gensim/T5
This is built with Streamlit Framework, an awesome framework for building ML and NLP tools.
Purpose
To perform basic and useful NLP task with Streamlit, Spacy, Textblob and Gensim
"""
# Core Pkgs
import streamlit as st
import os
import torch
from transformers import AutoTokenizer, AutoModelWithLMHead
# NLP Pkgs
from textblob import TextBlob
import spacy
from gensim.summarization import summarize
import requests
import cv2
import numpy as np
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
from PIL import Image
# Function to Analyse Tokens and Lemma
tokenizer = AutoTokenizer.from_pretrained('t5-base')
model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)
@st.cache
def text_analyzer(my_text):
nlp = spacy.load('en_core_web_sm')
docx = nlp(my_text)
# tokens = [ token.text for token in docx]
allData = [('"Token":{},\n"Lemma":{}'.format(token.text,token.lemma_))for token in docx ]
return allData
# Function For Extracting Entities
@st.cache
def entity_analyzer(my_text):
nlp = spacy.load('en_core_web_sm')
docx = nlp(my_text)
tokens = [ token.text for token in docx]
entities = [(entity.text,entity.label_)for entity in docx.ents]
allData = ['"Token":{},\n"Entities":{}'.format(tokens,entities)]
return allData
def main():
""" NLP Based App with Streamlit """
# Title
st.title("Streamlit NLP APP")
st.markdown("""
#### Description
+ This is a Natural Language Processing(NLP) Based App useful for basic NLP task
NER,Sentiment, Spell Corrections and Summarization
""")
# Entity Extraction
if st.checkbox("Show Named Entities"):
st.subheader("Analyze Your Text")
message = st.text_area("Enter your Text","Typing Here ..")
if st.button("Extract"):
entity_result = entity_analyzer(message)
st.json(entity_result)
# Sentiment Analysis
elif st.checkbox("Show Sentiment Analysis"):
st.subheader("Analyse Your Text")
message = st.text_area("Enter Text plz","Type Here .")
if st.button("Analyze"):
blob = TextBlob(message)
result_sentiment = blob.sentiment
st.success(result_sentiment)
#Text Corrections
elif st.checkbox("Spell Corrections"):
st.subheader("Correct Your Text")
message = st.text_area("Enter the Text","Type please ..")
if st.button("Spell Corrections"):
st.text("Using TextBlob ..")
st.success(TextBlob(message).correct())
def change_photo_state():
st.session_state["photo"]="done"
st.subheader("Summary section, feed your image!")
camera_photo = st.camera_input("Take a photo", on_change=change_photo_state)
uploaded_photo = st.file_uploader("Upload Image",type=['jpg','png','jpeg'], on_change=change_photo_state)
message = st.text_input("Or, drop your text here!")
if "photo" not in st.session_state:
st.session_state["photo"]="not done"
if st.session_state["photo"]=="done" or message:
if uploaded_photo:
img = Image.open(uploaded_photo)
img = img.save("img.png")
img = cv2.imread("img.png")
text = pytesseract.image_to_string(img)
st.success(text)
if camera_photo:
img = Image.open(camera_photo)
img = img.save("img.png")
img = cv2.imread("img.png")
text = pytesseract.image_to_string(img)
st.success(text)
if uploaded_photo==None and camera_photo==None:
#our_image=load_image("image.jpg")
#img = cv2.imread("scholarly_text.jpg")
text = message
# Summarization
if st.checkbox("Show Text Summarization Genism"):
st.subheader("Summarize Your Text")
#message = st.text_area("Enter the Text","Type please ..")
st.text("Using Gensim Summarizer ..")
#st.success(mess)
summary_result = summarize(text)
st.success(summary_result)
elif st.checkbox("Show Text Summarization T5"):
st.subheader("Summarize Your Text")
#message = st.text_area("Enter the Text","Type please ..")
st.text("Using Google T5 Transformer ..")
inputs = tokenizer.encode("summarize: " + text,
return_tensors='pt',
max_length=512,
truncation=True)
summary_ids = model.generate(inputs, max_length=150, min_length=80, length_penalty=5., num_beams=2)
summary = tokenizer.decode(summary_ids[0])
st.success(summary)
st.sidebar.subheader("About App")
st.sidebar.subheader("By")
st.sidebar.text("Soumen Sarker")
if __name__ == '__main__':
main()
|