amirulhazym commited on
Commit
f5290c6
·
verified ·
1 Parent(s): 22bde60

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +179 -0
  2. requirements.txt +141 -0
app.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline
3
+ import numpy as np
4
+ import torch
5
+
6
+ # Set page configuration
7
+ st.set_page_config(
8
+ page_title="Sentiment Analysis API",
9
+ page_icon="😀",
10
+ layout="centered",
11
+ initial_sidebar_state="collapsed",
12
+ )
13
+
14
+ # App title and description
15
+ st.title("Sentiment Analysis API")
16
+ st.write("This API uses a pre-trained BERT model to classify text sentiment as positive, negative, or neutral.")
17
+
18
+ # Load the sentiment analysis model
19
+ @st.cache_resource
20
+ def load_model():
21
+ return pipeline('sentiment-analysis', model="cardiffnlp/twitter-roberta-base-sentiment")
22
+
23
+ # Get the model
24
+ model = load_model()
25
+
26
+ # Sample text examples
27
+ examples = [
28
+ "I absolutely love this new feature! It's amazing.",
29
+ "This product is terrible and doesn't work as advertised.",
30
+ "The weather is just okay today, nothing special."
31
+ ]
32
+
33
+ # Create the UI elements
34
+ text_input = st.text_area("Enter text to analyze:", height=150,
35
+ placeholder="Type or paste your text here...")
36
+
37
+ # Add example buttons
38
+ st.write("Or try one of these examples:")
39
+ col1, col2, col3 = st.columns(3)
40
+ with col1:
41
+ if st.button("Positive Example"):
42
+ text_input = examples[0]
43
+ with col2:
44
+ if st.button("Negative Example"):
45
+ text_input = examples[1]
46
+ with col3:
47
+ if st.button("Neutral Example"):
48
+ text_input = examples[2]
49
+
50
+ # Function to analyze and display sentiment
51
+ def analyze_sentiment(text):
52
+ try:
53
+ result = model(text)[0]
54
+
55
+ # Map labels to user-friendly sentiment names
56
+ sentiment_mapping = {
57
+ 'LABEL_0': 'Negative',
58
+ 'LABEL_1': 'Neutral',
59
+ 'LABEL_2': 'Positive'
60
+ }
61
+
62
+ sentiment = sentiment_mapping[result['label']]
63
+ confidence = result['score']
64
+
65
+ # Display results with color-coded box
66
+ if sentiment == "Positive":
67
+ st.success(f"Sentiment: {sentiment} (Confidence: {confidence:.4f})")
68
+ elif sentiment == "Negative":
69
+ st.error(f"Sentiment: {sentiment} (Confidence: {confidence:.4f})")
70
+ else:
71
+ st.info(f"Sentiment: {sentiment} (Confidence: {confidence:.4f})")
72
+
73
+ # Display confidence as a progress bar
74
+ st.progress(confidence)
75
+
76
+ # Show detailed sentiment breakdown
77
+ st.subheader("Sentiment Breakdown")
78
+ sentiment_data = {
79
+ 'Sentiment': ['Negative', 'Neutral', 'Positive'],
80
+ 'Score': [0, 0, 0] # Default values
81
+ }
82
+
83
+ # Update the score for the detected sentiment
84
+ if sentiment == "Positive":
85
+ sentiment_data['Score'][2] = confidence
86
+ elif sentiment == "Negative":
87
+ sentiment_data['Score'][0] = confidence
88
+ else:
89
+ sentiment_data['Score'][1] = confidence
90
+
91
+ # Display as a horizontal bar chart
92
+ st.bar_chart(sentiment_data, x='Sentiment', y='Score')
93
+
94
+ except Exception as e:
95
+ st.error(f"An error occurred: {str(e)}")
96
+
97
+ # Process the text when the analyze button is clicked
98
+ if st.button("Analyze Sentiment") and text_input:
99
+ with st.spinner("Analyzing sentiment..."):
100
+ analyze_sentiment(text_input)
101
+ elif text_input:
102
+ st.info("Click 'Analyze Sentiment' to process the text.")
103
+ else:
104
+ st.info("Please enter some text to analyze.")
105
+
106
+ def analyze_sentiment(text):
107
+ try:
108
+ if len(text.split()) > 512:
109
+ st.error("Input too long (max 512 words). Please shorten the text.")
110
+ return
111
+ result = model(text)[0]
112
+ sentiment_mapping = {
113
+ 'LABEL_0': 'Negative',
114
+ 'LABEL_1': 'Neutral',
115
+ 'LABEL_2': 'Positive'
116
+ }
117
+ sentiment = sentiment_mapping[result['label']]
118
+ confidence = result['score']
119
+ if sentiment == "Positive":
120
+ st.success(f"Sentiment: {sentiment} (Confidence: {confidence:.4f})")
121
+ elif sentiment == "Negative":
122
+ st.error(f"Sentiment: {sentiment} (Confidence: {confidence:.4f})")
123
+ else:
124
+ st.info(f"Sentiment: {sentiment} (Confidence: {confidence:.4f})")
125
+ st.progress(confidence)
126
+ st.subheader("Sentiment Breakdown")
127
+ sentiment_data = {
128
+ 'Sentiment': ['Negative', 'Neutral', 'Positive'],
129
+ 'Score': [0, 0, 0]
130
+ }
131
+ if sentiment == "Positive":
132
+ sentiment_data['Score'][2] = confidence
133
+ elif sentiment == "Negative":
134
+ sentiment_data['Score'][0] = confidence
135
+ else:
136
+ sentiment_data['Score'][1] = confidence
137
+ st.bar_chart(sentiment_data, x='Sentiment', y='Score')
138
+ except Exception as e:
139
+ st.error(f"Error analyzing sentiment: {str(e)}. Please try again or use shorter text.")
140
+
141
+ # Add information about the model
142
+ with st.expander("About the Model"):
143
+ st.write("""
144
+ This application uses the `cardiffnlp/twitter-roberta-base-sentiment` model from Hugging Face.
145
+
146
+ The model is a RoBERTa-base model trained on ~58M tweets and fine-tuned for sentiment analysis
147
+ with the TweetEval benchmark. It classifies text into three sentiment categories:
148
+
149
+ - Negative (LABEL_0)
150
+ - Neutral (LABEL_1)
151
+ - Positive (LABEL_2)
152
+
153
+ Source: [cardiffnlp/twitter-roberta-base-sentiment](https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment)
154
+ """)
155
+
156
+ with st.expander("Model Performance"):
157
+ st.write("Tested on 100 samples from `tweet_eval` dataset.")
158
+ if st.button("Show Test Accuracy"):
159
+ from datasets import load_dataset
160
+ dataset = load_dataset("tweet_eval", "sentiment", split="test[:100]")
161
+ correct = sum(1 for text, label in zip(dataset['text'], dataset['label']) if (2 if model(text)[0]['label'] == 'LABEL_2' else 0 if model(text)[0]['label'] == 'LABEL_0' else 1) == label)
162
+ st.write(f"Accuracy: {correct/100:.2f}")
163
+
164
+ # Footer
165
+ st.markdown("---")
166
+ st.markdown("Created as part of Mini Project 1: Sentiment Analysis API")
167
+
168
+ @inproceedings{barbieri-etal-2020-tweeteval,
169
+ title = "{T}weet{E}val: Unified Benchmark and Comparative Evaluation for Tweet Classification",
170
+ author = "Barbieri, Francesco, Camacho-Collados, Jose, Espinosa Anke, Luis, Neves, Leonardo",
171
+ booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2020",
172
+ month = nov,
173
+ year = "2020",
174
+ address = "Online",
175
+ publisher = "Association for Computational Linguistics",
176
+ url = "https://aclanthology.org/2020.findings-emnlp.148",
177
+ doi = "10.18653/v1/2020.findings-emnlp.148",
178
+ pages = "1644--1650"
179
+ }
requirements.txt ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohappyeyeballs==2.6.1
2
+ aiohttp==3.11.16
3
+ aiosignal==1.3.2
4
+ altair==5.5.0
5
+ anyio==4.9.0
6
+ argon2-cffi==23.1.0
7
+ argon2-cffi-bindings==21.2.0
8
+ arrow==1.3.0
9
+ asttokens==3.0.0
10
+ async-lru==2.0.5
11
+ attrs==25.3.0
12
+ babel==2.17.0
13
+ beautifulsoup4==4.13.4
14
+ bleach==6.2.0
15
+ blinker==1.9.0
16
+ cachetools==5.5.2
17
+ certifi==2025.1.31
18
+ cffi==1.17.1
19
+ charset-normalizer==3.4.1
20
+ click==8.1.8
21
+ colorama==0.4.6
22
+ comm==0.2.2
23
+ datasets==3.5.0
24
+ debugpy==1.8.14
25
+ decorator==5.2.1
26
+ defusedxml==0.7.1
27
+ dill==0.3.8
28
+ executing==2.2.0
29
+ fastjsonschema==2.21.1
30
+ filelock==3.18.0
31
+ fqdn==1.5.1
32
+ frozenlist==1.6.0
33
+ fsspec==2024.12.0
34
+ gitdb==4.0.12
35
+ GitPython==3.1.44
36
+ h11==0.14.0
37
+ httpcore==1.0.8
38
+ httpx==0.28.1
39
+ huggingface-hub==0.30.2
40
+ idna==3.10
41
+ ipykernel==6.29.5
42
+ ipython==9.1.0
43
+ ipython_pygments_lexers==1.1.1
44
+ ipywidgets==8.1.6
45
+ isoduration==20.11.0
46
+ jedi==0.19.2
47
+ Jinja2==3.1.6
48
+ json5==0.12.0
49
+ jsonpointer==3.0.0
50
+ jsonschema==4.23.0
51
+ jsonschema-specifications==2024.10.1
52
+ jupyter==1.1.1
53
+ jupyter-console==6.6.3
54
+ jupyter-events==0.12.0
55
+ jupyter-lsp==2.2.5
56
+ jupyter_client==8.6.3
57
+ jupyter_core==5.7.2
58
+ jupyter_server==2.15.0
59
+ jupyter_server_terminals==0.5.3
60
+ jupyterlab==4.4.0
61
+ jupyterlab_pygments==0.3.0
62
+ jupyterlab_server==2.27.3
63
+ jupyterlab_widgets==3.0.14
64
+ MarkupSafe==3.0.2
65
+ matplotlib-inline==0.1.7
66
+ mistune==3.1.3
67
+ mpmath==1.3.0
68
+ multidict==6.4.3
69
+ multiprocess==0.70.16
70
+ narwhals==1.35.0
71
+ nbclient==0.10.2
72
+ nbconvert==7.16.6
73
+ nbformat==5.10.4
74
+ nest-asyncio==1.6.0
75
+ networkx==3.4.2
76
+ notebook==7.4.0
77
+ notebook_shim==0.2.4
78
+ numpy==2.2.4
79
+ overrides==7.7.0
80
+ packaging==24.2
81
+ pandas==2.2.3
82
+ pandocfilters==1.5.1
83
+ parso==0.8.4
84
+ pillow==11.2.1
85
+ platformdirs==4.3.7
86
+ prometheus_client==0.21.1
87
+ prompt_toolkit==3.0.51
88
+ propcache==0.3.1
89
+ protobuf==5.29.4
90
+ psutil==7.0.0
91
+ pure_eval==0.2.3
92
+ pyarrow==19.0.1
93
+ pycparser==2.22
94
+ pydeck==0.9.1
95
+ Pygments==2.19.1
96
+ python-dateutil==2.9.0.post0
97
+ python-json-logger==3.3.0
98
+ pytz==2025.2
99
+ pywin32==310
100
+ pywinpty==2.0.15
101
+ PyYAML==6.0.2
102
+ pyzmq==26.4.0
103
+ referencing==0.36.2
104
+ regex==2024.11.6
105
+ requests==2.32.3
106
+ rfc3339-validator==0.1.4
107
+ rfc3986-validator==0.1.1
108
+ rpds-py==0.24.0
109
+ safetensors==0.5.3
110
+ Send2Trash==1.8.3
111
+ setuptools==78.1.0
112
+ six==1.17.0
113
+ smmap==5.0.2
114
+ sniffio==1.3.1
115
+ soupsieve==2.6
116
+ stack-data==0.6.3
117
+ streamlit==1.44.1
118
+ sympy==1.13.1
119
+ tenacity==9.1.2
120
+ terminado==0.18.1
121
+ tinycss2==1.4.0
122
+ tokenizers==0.21.1
123
+ toml==0.10.2
124
+ torch==2.6.0
125
+ tornado==6.4.2
126
+ tqdm==4.67.1
127
+ traitlets==5.14.3
128
+ transformers==4.51.3
129
+ types-python-dateutil==2.9.0.20241206
130
+ typing_extensions==4.13.2
131
+ tzdata==2025.2
132
+ uri-template==1.3.0
133
+ urllib3==2.4.0
134
+ watchdog==6.0.0
135
+ wcwidth==0.2.13
136
+ webcolors==24.11.1
137
+ webencodings==0.5.1
138
+ websocket-client==1.8.0
139
+ widgetsnbextension==4.0.14
140
+ xxhash==3.5.0
141
+ yarl==1.20.0