chinhon commited on
Commit
8982c1f
·
1 Parent(s): bd5edba

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +156 -0
app.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib as mpl
2
+ import pandas as pd
3
+ import plotly.express as px
4
+ import plotly.graph_objects as go
5
+ import numpy as np
6
+ import re
7
+ import streamlit as st
8
+
9
+ from io import StringIO, BytesIO
10
+ from transformers import pipeline
11
+
12
+ try:
13
+ st.set_page_config(layout="wide")
14
+ except:
15
+ st.beta_set_page_config(layout="wide")
16
+
17
+ st.title("Sentiment Structure Visualizer")
18
+
19
+ user_input = st.text_area(
20
+ "Paste English text here",
21
+ height=200,
22
+ )
23
+
24
+ st.markdown(
25
+ """
26
+ ----------------------------
27
+ """
28
+ )
29
+
30
+ sentiment = pipeline(
31
+ "sentiment-analysis",
32
+ model="distilbert-base-uncased-finetuned-sst-2-english",
33
+ tokenizer="distilbert-base-uncased-finetuned-sst-2-english",
34
+ )
35
+
36
+ def clean_text(text):
37
+ text = text.encode("ascii", errors="ignore").decode(
38
+ "ascii"
39
+ ) # remove non-ascii, Chinese characters
40
+ text = text.lower()
41
+ text = re.sub(r"\n", " ", text)
42
+ text = re.sub(r"\n\n", " ", text)
43
+ text = re.sub(r"\t", " ", text)
44
+ text = text.strip(" ")
45
+ text = re.sub(r"[^\w\s]", "", text) # remove punctuation and special characters
46
+ text = re.sub(
47
+ " +", " ", text
48
+ ).strip() # get rid of multiple spaces and replace with a single
49
+ return text
50
+
51
+ if user_input != "":
52
+ with st.spinner("..."):
53
+ input_text = (
54
+ pd.DataFrame(user_input.split("."))
55
+ .stack()
56
+ .reset_index()
57
+ .rename(columns={0: "Paras"})
58
+ .drop("level_0", axis=1)
59
+ .drop("level_1", axis=1)
60
+ )
61
+
62
+ input_text["Clean_Text"] = input_text["Paras"].map(
63
+ lambda text: clean_text(text)
64
+ )
65
+
66
+ corpus = list(input_text["Clean_Text"].values)
67
+
68
+ input_text["Sentiment"] = sentiment(corpus)
69
+
70
+ input_text["Sentiment_Label"] = [
71
+ x.get("label") for x in input_text["Sentiment"]
72
+ ]
73
+
74
+ input_text["Sentiment_Score"] = [
75
+ x.get("score") for x in input_text["Sentiment"]
76
+ ]
77
+
78
+ cols = ["Paras", "Sentiment_Label", "Sentiment_Score"]
79
+ df = input_text[cols].copy()
80
+
81
+ df = df[df["Paras"].str.strip().astype(bool)]
82
+
83
+ df["Sentiment_Score"] = np.where(
84
+ df["Sentiment_Label"] == "NEGATIVE",
85
+ -(df["Sentiment_Score"]),
86
+ df["Sentiment_Score"],
87
+ )
88
+
89
+ df["Sentiment_Score"] = df["Sentiment_Score"].round(6)
90
+
91
+ overall_sentiment_score = df["Sentiment_Score"].sum().round(3)
92
+
93
+ sentiment_count = df["Sentiment_Label"].value_counts().to_string()
94
+
95
+ fig = go.Figure(
96
+ data=go.Heatmap(
97
+ z=df["Sentiment_Score"],
98
+ x=df.index,
99
+ y=df["Sentiment_Label"],
100
+ colorscale=px.colors.sequential.RdBu,
101
+ )
102
+ )
103
+
104
+ fig.update_layout(
105
+ title=go.layout.Title(text="Sentiment Sequence, By Sentence"), autosize=True
106
+ )
107
+
108
+ fig.update_layout(yaxis_autorange="reversed")
109
+
110
+ st.plotly_chart(fig, use_container_width=True)
111
+
112
+ buffer = StringIO()
113
+ fig.write_html(buffer, include_plotlyjs="cdn")
114
+ html_bytes = buffer.getvalue().encode()
115
+
116
+ st.download_button(
117
+ label="Download Interactive Chart",
118
+ data=html_bytes,
119
+ file_name="chart.html",
120
+ mime="text/html",
121
+ )
122
+
123
+ col1, col2 = st.columns([1,3])
124
+
125
+ with col1:
126
+ st.metric(
127
+ "Overall Sentiment Score",
128
+ overall_sentiment_score,
129
+ delta=None,
130
+ delta_color="normal",
131
+ )
132
+
133
+ with col2:
134
+ st.metric(
135
+ "How Many Positive & Negative Sentences?",
136
+ sentiment_count,
137
+ delta=None,
138
+ delta_color="normal",
139
+ )
140
+
141
+
142
+ st.markdown(
143
+ """
144
+ ----------------------------
145
+ """
146
+ )
147
+
148
+ st.subheader("Note To Users:")
149
+
150
+ st.write("1. The model under the hood is distilbert-base-uncased-finetuned-sst-2-english. Clone this app and switch to another transformer model if you have a different use case.")
151
+
152
+ st.write("2. This chart is interactive, and can be downloaded. Hover over the bars to see each sentence's sentiment score and label")
153
+
154
+ st.write(
155
+ "3. You may or may not agree with the sentiment label generated for each sentence. Unfortunately there's no way to amend the output within the app."
156
+ )