Puyush commited on
Commit
365c2b2
·
1 Parent(s): 5de65b7

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -158
app.py DELETED
@@ -1,158 +0,0 @@
1
- import re
2
- import nltk
3
- import keras
4
- import spacy
5
- import string
6
- import pickle
7
- import tempfile
8
- import numpy as np
9
- import gradio as gr
10
- import contractions
11
- import tensorflow as tf
12
- from nltk.stem import WordNetLemmatizer
13
- from nltk.tokenize import word_tokenize
14
- from nltk.corpus import stopwords, wordnet
15
- from tensorflow.keras.layers import Layer
16
- from tensorflow.keras import backend as K
17
- from tensorflow.keras.preprocessing.sequence import pad_sequences
18
-
19
-
20
- class Attention(Layer):
21
-
22
- def __init__(self, return_sequences=True, **kwargs):
23
- self.return_sequences = return_sequences
24
- super(Attention, self).__init__(**kwargs)
25
-
26
- def build(self, input_shape):
27
-
28
- self.W=self.add_weight(name="att_weight", shape=(input_shape[-1],1),
29
- initializer="normal")
30
- self.b=self.add_weight(name="att_bias", shape=(input_shape[1],1),
31
- initializer="zeros")
32
-
33
- super(Attention,self).build(input_shape)
34
-
35
- def call(self, x):
36
-
37
- e = K.tanh(K.dot(x,self.W)+self.b)
38
- a = K.softmax(e, axis=1)
39
- output = x*a
40
-
41
- if self.return_sequences:
42
- return output
43
-
44
- return K.sum(output, axis=1)
45
-
46
-
47
-
48
- def load_tokenizer(path):
49
- with open(path, 'rb') as f:
50
- tokenizer = pickle.load(f)
51
- return tokenizer
52
-
53
-
54
- def cleaning(text):
55
- nlp = spacy.load('en_core_web_sm')
56
- # Punctuation symbols to remove
57
- exclude = string.punctuation
58
-
59
- def expand_contractions(text): return contractions.fix(text)
60
- text = expand_contractions(text)
61
-
62
- text = text.lower()
63
-
64
- def remove_tags(text): return re.sub(r'@\w*', ' ' , text)
65
- text = remove_tags(text)
66
-
67
- def remove_hashtags(text): return re.sub(r'#\w*', ' ' , text)
68
- text = remove_hashtags(text)
69
-
70
- def remove_apostrophe(text): return re.sub(r"'s\b", "", text)
71
- text = remove_apostrophe(text)
72
-
73
- def remove_special_chars(text): return re.sub(r"[^a-zA-Z0-9\s]", ' ', text)
74
- text = remove_special_chars(text)
75
-
76
- def remove_number(text): return re.sub(r'[\d]', ' ', text)
77
- text = remove_number(text)
78
-
79
- def remove_punc(text): return ''.join([c for c in text if c not in exclude])
80
- text = remove_punc(text)
81
-
82
- def remove_extra_spaces(text): return re.sub('^\S', ' ', text)
83
- text = remove_extra_spaces(text)
84
-
85
- def map_pos_tags(pos_tags):
86
- # Map NLTK POS tags to WordNet tags
87
- tag_map = {
88
- 'N': wordnet.NOUN,
89
- 'V': wordnet.VERB,
90
- 'R': wordnet.ADV,
91
- 'J': wordnet.ADJ
92
- }
93
-
94
- mapped_tags = []
95
- for token, tag in pos_tags:
96
- mapped_tag = tag[0].upper()
97
- if mapped_tag in tag_map:
98
- mapped_tag = tag_map[mapped_tag]
99
- else:
100
- mapped_tag = wordnet.NOUN # Default to noun if no mapping found
101
- mapped_tags.append(mapped_tag)
102
-
103
- return mapped_tags
104
-
105
- def remove_stopwords(text):
106
- stop_words = set(stopwords.words('english'))
107
- tokens = word_tokenize(text)
108
- filtered_text = [word for word in tokens if word.lower() not in stop_words]
109
- return ' '.join(filtered_text)
110
- text = remove_stopwords(text)
111
-
112
- def pos_tag_and_lemmatize(text):
113
- tokens = word_tokenize(text)
114
- pos_tags = nltk.pos_tag(tokens)
115
-
116
- # Map POS tags to WordNet tags
117
- wordnet_tags = map_pos_tags(pos_tags)
118
-
119
- # Lemmatize based on POS tags
120
- lemmatizer = WordNetLemmatizer()
121
- lemmas = " ".join([lemmatizer.lemmatize(token, tag) for token, tag in zip(tokens, wordnet_tags)])
122
-
123
- return lemmas
124
- text = pos_tag_and_lemmatize(text)
125
-
126
- return text
127
-
128
-
129
- def label_tweet(test_review):
130
- token_list = tokenizer.texts_to_sequences([test_review])[0]
131
- token_list = pad_sequences([token_list], maxlen=44, padding='post')
132
- predicted = model.predict(token_list, verbose=0)
133
- if predicted >= 0.5:
134
- return 1
135
- else:
136
- return 0
137
-
138
-
139
- def analyze_text(comment):
140
- comment = cleaning(comment)
141
- result = label_tweet(comment)
142
- if result == 0:
143
- text = "Negative"
144
- else:
145
- text = "Positive"
146
- return text
147
-
148
-
149
- # It can be used to reconstruct the model identically.
150
- model = keras.models.load_model("twitter_sentiment.keras",
151
- custom_objects={'Attention': Attention})
152
-
153
- # Load tokenizer
154
- tokenizer = load_tokenizer('tokenizer.pkl')
155
-
156
- interface = gr.Interface(fn=analyze_text, inputs=gr.inputs.Textbox(lines=2, placeholder='Enter a positive or negative tweet here...'),
157
- outputs='text',title='Twitter Sentimental Analysis', theme='darkhuggingface')
158
- interface.launch(inline=False)