Puyush commited on
Commit
437b589
·
1 Parent(s): 365c2b2

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +157 -0
app.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import nltk
3
+ import keras
4
+ import spacy
5
+ import string
6
+ import pickle
7
+ import tempfile
8
+ import numpy as np
9
+ import gradio as gr
10
+ import contractions
11
+ import tensorflow as tf
12
+ from nltk.stem import WordNetLemmatizer
13
+ from nltk.tokenize import word_tokenize
14
+ from nltk.corpus import stopwords, wordnet
15
+ from tensorflow.keras.layers import Layer
16
+ from tensorflow.keras import backend as K
17
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
18
+
19
+
20
+ class Attention(Layer):
21
+
22
+ def __init__(self, return_sequences=True, **kwargs):
23
+ self.return_sequences = return_sequences
24
+ super(Attention, self).__init__(**kwargs)
25
+
26
+ def build(self, input_shape):
27
+
28
+ self.W=self.add_weight(name="att_weight", shape=(input_shape[-1],1),
29
+ initializer="normal")
30
+ self.b=self.add_weight(name="att_bias", shape=(input_shape[1],1),
31
+ initializer="zeros")
32
+
33
+ super(Attention,self).build(input_shape)
34
+
35
+ def call(self, x):
36
+
37
+ e = K.tanh(K.dot(x,self.W)+self.b)
38
+ a = K.softmax(e, axis=1)
39
+ output = x*a
40
+
41
+ if self.return_sequences:
42
+ return output
43
+
44
+ return K.sum(output, axis=1)
45
+
46
+
47
+
48
+ def load_tokenizer(path):
49
+ with open(path, 'rb') as f:
50
+ tokenizer = pickle.load(f)
51
+ return tokenizer
52
+
53
+
54
+ def cleaning(text):
55
+ # Punctuation symbols to remove
56
+ exclude = string.punctuation
57
+
58
+ def expand_contractions(text): return contractions.fix(text)
59
+ text = expand_contractions(text)
60
+
61
+ text = text.lower()
62
+
63
+ def remove_tags(text): return re.sub(r'@\w*', ' ' , text)
64
+ text = remove_tags(text)
65
+
66
+ def remove_hashtags(text): return re.sub(r'#\w*', ' ' , text)
67
+ text = remove_hashtags(text)
68
+
69
+ def remove_apostrophe(text): return re.sub(r"'s\b", "", text)
70
+ text = remove_apostrophe(text)
71
+
72
+ def remove_special_chars(text): return re.sub(r"[^a-zA-Z0-9\s]", ' ', text)
73
+ text = remove_special_chars(text)
74
+
75
+ def remove_number(text): return re.sub(r'[\d]', ' ', text)
76
+ text = remove_number(text)
77
+
78
+ def remove_punc(text): return ''.join([c for c in text if c not in exclude])
79
+ text = remove_punc(text)
80
+
81
+ def remove_extra_spaces(text): return re.sub('^\S', ' ', text)
82
+ text = remove_extra_spaces(text)
83
+
84
+ def map_pos_tags(pos_tags):
85
+ # Map NLTK POS tags to WordNet tags
86
+ tag_map = {
87
+ 'N': wordnet.NOUN,
88
+ 'V': wordnet.VERB,
89
+ 'R': wordnet.ADV,
90
+ 'J': wordnet.ADJ
91
+ }
92
+
93
+ mapped_tags = []
94
+ for token, tag in pos_tags:
95
+ mapped_tag = tag[0].upper()
96
+ if mapped_tag in tag_map:
97
+ mapped_tag = tag_map[mapped_tag]
98
+ else:
99
+ mapped_tag = wordnet.NOUN # Default to noun if no mapping found
100
+ mapped_tags.append(mapped_tag)
101
+
102
+ return mapped_tags
103
+
104
+ def remove_stopwords(text):
105
+ stop_words = set(stopwords.words('english'))
106
+ tokens = word_tokenize(text)
107
+ filtered_text = [word for word in tokens if word.lower() not in stop_words]
108
+ return ' '.join(filtered_text)
109
+ text = remove_stopwords(text)
110
+
111
+ def pos_tag_and_lemmatize(text):
112
+ tokens = word_tokenize(text)
113
+ pos_tags = nltk.pos_tag(tokens)
114
+
115
+ # Map POS tags to WordNet tags
116
+ wordnet_tags = map_pos_tags(pos_tags)
117
+
118
+ # Lemmatize based on POS tags
119
+ lemmatizer = WordNetLemmatizer()
120
+ lemmas = " ".join([lemmatizer.lemmatize(token, tag) for token, tag in zip(tokens, wordnet_tags)])
121
+
122
+ return lemmas
123
+ text = pos_tag_and_lemmatize(text)
124
+
125
+ return text
126
+
127
+
128
+ def label_tweet(test_review):
129
+ token_list = tokenizer.texts_to_sequences([test_review])[0]
130
+ token_list = pad_sequences([token_list], maxlen=44, padding='post')
131
+ predicted = model.predict(token_list, verbose=0)
132
+ if predicted >= 0.5:
133
+ return 1
134
+ else:
135
+ return 0
136
+
137
+
138
+ def analyze_text(comment):
139
+ comment = cleaning(comment)
140
+ result = label_tweet(comment)
141
+ if result == 0:
142
+ text = "Negative"
143
+ else:
144
+ text = "Positive"
145
+ return text
146
+
147
+
148
+ # It can be used to reconstruct the model identically.
149
+ model = keras.models.load_model("twitter_sentiment.keras",
150
+ custom_objects={'Attention': Attention})
151
+
152
+ # Load tokenizer
153
+ tokenizer = load_tokenizer('tokenizer.pkl')
154
+
155
+ interface = gr.Interface(fn=analyze_text, inputs=gr.inputs.Textbox(lines=2, placeholder='Enter a positive or negative tweet here...'),
156
+ outputs='text',title='Twitter Sentimental Analysis', theme='darkhuggingface')
157
+ interface.launch(inline=False)