Upload app.py
Browse files
app.py
ADDED
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import matplotlib as mpl
|
2 |
+
import pandas as pd
|
3 |
+
import plotly.express as px
|
4 |
+
import plotly.graph_objects as go
|
5 |
+
import numpy as np
|
6 |
+
import re
|
7 |
+
import streamlit as st
|
8 |
+
|
9 |
+
from io import StringIO, BytesIO
|
10 |
+
from transformers import pipeline
|
11 |
+
|
12 |
+
try:
|
13 |
+
st.set_page_config(layout="wide")
|
14 |
+
except:
|
15 |
+
st.beta_set_page_config(layout="wide")
|
16 |
+
|
17 |
+
st.title("Sentiment Structure Visualizer")
|
18 |
+
|
19 |
+
user_input = st.text_area(
|
20 |
+
"Paste English text here",
|
21 |
+
height=200,
|
22 |
+
)
|
23 |
+
|
24 |
+
st.markdown(
|
25 |
+
"""
|
26 |
+
----------------------------
|
27 |
+
"""
|
28 |
+
)
|
29 |
+
|
30 |
+
sentiment = pipeline(
|
31 |
+
"sentiment-analysis",
|
32 |
+
model="distilbert-base-uncased-finetuned-sst-2-english",
|
33 |
+
tokenizer="distilbert-base-uncased-finetuned-sst-2-english",
|
34 |
+
)
|
35 |
+
|
36 |
+
def clean_text(text):
|
37 |
+
text = text.encode("ascii", errors="ignore").decode(
|
38 |
+
"ascii"
|
39 |
+
) # remove non-ascii, Chinese characters
|
40 |
+
text = text.lower()
|
41 |
+
text = re.sub(r"\n", " ", text)
|
42 |
+
text = re.sub(r"\n\n", " ", text)
|
43 |
+
text = re.sub(r"\t", " ", text)
|
44 |
+
text = text.strip(" ")
|
45 |
+
text = re.sub(r"[^\w\s]", "", text) # remove punctuation and special characters
|
46 |
+
text = re.sub(
|
47 |
+
" +", " ", text
|
48 |
+
).strip() # get rid of multiple spaces and replace with a single
|
49 |
+
return text
|
50 |
+
|
51 |
+
if user_input != "":
|
52 |
+
with st.spinner("..."):
|
53 |
+
input_text = (
|
54 |
+
pd.DataFrame(user_input.split("."))
|
55 |
+
.stack()
|
56 |
+
.reset_index()
|
57 |
+
.rename(columns={0: "Paras"})
|
58 |
+
.drop("level_0", axis=1)
|
59 |
+
.drop("level_1", axis=1)
|
60 |
+
)
|
61 |
+
|
62 |
+
input_text["Clean_Text"] = input_text["Paras"].map(
|
63 |
+
lambda text: clean_text(text)
|
64 |
+
)
|
65 |
+
|
66 |
+
corpus = list(input_text["Clean_Text"].values)
|
67 |
+
|
68 |
+
input_text["Sentiment"] = sentiment(corpus)
|
69 |
+
|
70 |
+
input_text["Sentiment_Label"] = [
|
71 |
+
x.get("label") for x in input_text["Sentiment"]
|
72 |
+
]
|
73 |
+
|
74 |
+
input_text["Sentiment_Score"] = [
|
75 |
+
x.get("score") for x in input_text["Sentiment"]
|
76 |
+
]
|
77 |
+
|
78 |
+
cols = ["Paras", "Sentiment_Label", "Sentiment_Score"]
|
79 |
+
df = input_text[cols].copy()
|
80 |
+
|
81 |
+
df = df[df["Paras"].str.strip().astype(bool)]
|
82 |
+
|
83 |
+
df["Sentiment_Score"] = np.where(
|
84 |
+
df["Sentiment_Label"] == "NEGATIVE",
|
85 |
+
-(df["Sentiment_Score"]),
|
86 |
+
df["Sentiment_Score"],
|
87 |
+
)
|
88 |
+
|
89 |
+
df["Sentiment_Score"] = df["Sentiment_Score"].round(6)
|
90 |
+
|
91 |
+
overall_sentiment_score = df["Sentiment_Score"].sum().round(3)
|
92 |
+
|
93 |
+
sentiment_count = df["Sentiment_Label"].value_counts().to_string()
|
94 |
+
|
95 |
+
fig = go.Figure(
|
96 |
+
data=go.Heatmap(
|
97 |
+
z=df["Sentiment_Score"],
|
98 |
+
x=df.index,
|
99 |
+
y=df["Sentiment_Label"],
|
100 |
+
colorscale=px.colors.sequential.RdBu,
|
101 |
+
)
|
102 |
+
)
|
103 |
+
|
104 |
+
fig.update_layout(
|
105 |
+
title=go.layout.Title(text="Sentiment Sequence, By Sentence"), autosize=True
|
106 |
+
)
|
107 |
+
|
108 |
+
fig.update_layout(yaxis_autorange="reversed")
|
109 |
+
|
110 |
+
st.plotly_chart(fig, use_container_width=True)
|
111 |
+
|
112 |
+
buffer = StringIO()
|
113 |
+
fig.write_html(buffer, include_plotlyjs="cdn")
|
114 |
+
html_bytes = buffer.getvalue().encode()
|
115 |
+
|
116 |
+
st.download_button(
|
117 |
+
label="Download Interactive Chart",
|
118 |
+
data=html_bytes,
|
119 |
+
file_name="chart.html",
|
120 |
+
mime="text/html",
|
121 |
+
)
|
122 |
+
|
123 |
+
col1, col2 = st.columns([1,3])
|
124 |
+
|
125 |
+
with col1:
|
126 |
+
st.metric(
|
127 |
+
"Overall Sentiment Score",
|
128 |
+
overall_sentiment_score,
|
129 |
+
delta=None,
|
130 |
+
delta_color="normal",
|
131 |
+
)
|
132 |
+
|
133 |
+
with col2:
|
134 |
+
st.metric(
|
135 |
+
"How Many Positive & Negative Sentences?",
|
136 |
+
sentiment_count,
|
137 |
+
delta=None,
|
138 |
+
delta_color="normal",
|
139 |
+
)
|
140 |
+
|
141 |
+
|
142 |
+
st.markdown(
|
143 |
+
"""
|
144 |
+
----------------------------
|
145 |
+
"""
|
146 |
+
)
|
147 |
+
|
148 |
+
st.subheader("Note To Users:")
|
149 |
+
|
150 |
+
st.write("1. The model under the hood is distilbert-base-uncased-finetuned-sst-2-english. Clone this app and switch to another transformer model if you have a different use case.")
|
151 |
+
|
152 |
+
st.write("2. This chart is interactive, and can be downloaded. Hover over the bars to see each sentence's sentiment score and label")
|
153 |
+
|
154 |
+
st.write(
|
155 |
+
"3. You may or may not agree with the sentiment label generated for each sentence. Unfortunately there's no way to amend the output within the app."
|
156 |
+
)
|