|
import matplotlib as mpl |
|
import pandas as pd |
|
import plotly.express as px |
|
import plotly.graph_objects as go |
|
import numpy as np |
|
import re |
|
import streamlit as st |
|
|
|
from io import StringIO, BytesIO |
|
from transformers import pipeline |
|
|
|
try: |
|
st.set_page_config(layout="wide") |
|
except: |
|
st.beta_set_page_config(layout="wide") |
|
|
|
st.title("Sentiment Structure Visualizer") |
|
|
|
user_input = st.text_area( |
|
"Paste English text here", |
|
height=200, |
|
) |
|
|
|
st.markdown( |
|
""" |
|
---------------------------- |
|
""" |
|
) |
|
|
|
sentiment = pipeline( |
|
"sentiment-analysis", |
|
model="distilbert-base-uncased-finetuned-sst-2-english", |
|
tokenizer="distilbert-base-uncased-finetuned-sst-2-english", |
|
) |
|
|
|
def clean_text(text): |
|
text = text.encode("ascii", errors="ignore").decode( |
|
"ascii" |
|
) |
|
text = text.lower() |
|
text = re.sub(r"\n", " ", text) |
|
text = re.sub(r"\n\n", " ", text) |
|
text = re.sub(r"\t", " ", text) |
|
text = text.strip(" ") |
|
text = re.sub(r"[^\w\s]", "", text) |
|
text = re.sub( |
|
" +", " ", text |
|
).strip() |
|
return text |
|
|
|
if user_input != "": |
|
with st.spinner("..."): |
|
input_text = ( |
|
pd.DataFrame(user_input.split(".")) |
|
.stack() |
|
.reset_index() |
|
.rename(columns={0: "Paras"}) |
|
.drop("level_0", axis=1) |
|
.drop("level_1", axis=1) |
|
) |
|
|
|
input_text["Clean_Text"] = input_text["Paras"].map( |
|
lambda text: clean_text(text) |
|
) |
|
|
|
corpus = list(input_text["Clean_Text"].values) |
|
|
|
input_text["Sentiment"] = sentiment(corpus) |
|
|
|
input_text["Sentiment_Label"] = [ |
|
x.get("label") for x in input_text["Sentiment"] |
|
] |
|
|
|
input_text["Sentiment_Score"] = [ |
|
x.get("score") for x in input_text["Sentiment"] |
|
] |
|
|
|
cols = ["Paras", "Sentiment_Label", "Sentiment_Score"] |
|
df = input_text[cols].copy() |
|
|
|
df = df[df["Paras"].str.strip().astype(bool)] |
|
|
|
df["Sentiment_Score"] = np.where( |
|
df["Sentiment_Label"] == "NEGATIVE", |
|
-(df["Sentiment_Score"]), |
|
df["Sentiment_Score"], |
|
) |
|
|
|
df["Sentiment_Score"] = df["Sentiment_Score"].round(6) |
|
|
|
overall_sentiment_score = df["Sentiment_Score"].sum().round(3) |
|
|
|
sentiment_count = df["Sentiment_Label"].value_counts().to_string() |
|
|
|
fig = go.Figure( |
|
data=go.Heatmap( |
|
z=df["Sentiment_Score"], |
|
x=df.index, |
|
y=df["Sentiment_Label"], |
|
colorscale=px.colors.sequential.RdBu, |
|
) |
|
) |
|
|
|
fig.update_layout( |
|
title=go.layout.Title(text="Sentiment Sequence, By Sentence"), autosize=True |
|
) |
|
|
|
fig.update_layout(yaxis_autorange="reversed") |
|
|
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
buffer = StringIO() |
|
fig.write_html(buffer, include_plotlyjs="cdn") |
|
html_bytes = buffer.getvalue().encode() |
|
|
|
st.download_button( |
|
label="Download Interactive Chart", |
|
data=html_bytes, |
|
file_name="chart.html", |
|
mime="text/html", |
|
) |
|
|
|
col1, col2 = st.columns([1,3]) |
|
|
|
with col1: |
|
st.metric( |
|
"Overall Sentiment Score", |
|
overall_sentiment_score, |
|
delta=None, |
|
delta_color="normal", |
|
) |
|
|
|
with col2: |
|
st.metric( |
|
"How Many Positive & Negative Sentences?", |
|
sentiment_count, |
|
delta=None, |
|
delta_color="normal", |
|
) |
|
|
|
|
|
st.markdown( |
|
""" |
|
---------------------------- |
|
""" |
|
) |
|
|
|
st.subheader("Note To Users:") |
|
|
|
st.write("1. The model under the hood is distilbert-base-uncased-finetuned-sst-2-english. Clone this app and switch to another transformer model if you have a different use case.") |
|
|
|
st.write("2. This chart is interactive, and can be downloaded. Hover over the bars to see each sentence's sentiment score and label") |
|
|
|
st.write( |
|
"3. You may or may not agree with the sentiment label generated for each sentence. Unfortunately there's no way to amend the output within the app." |
|
) |
|
|