Jeffrey Rathgeber Jr commited on
Commit
e67eb9f
·
unverified ·
1 Parent(s): 54b7a50

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -50
app.py CHANGED
@@ -5,65 +5,135 @@ from textblob import TextBlob
5
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
6
  import torch
7
  import torch.nn.functional as F
 
 
 
 
8
 
9
  textIn = st.text_input("Input Text Here:", "I really like the color of your car!")
10
 
11
- # option = st.selectbox('Which pre-trained model would you like for your sentiment analysis?',('Pipeline', 'TextBlob', 'MILESTONE 3'))
12
- option = st.selectbox('Which pre-trained model would you like for your sentiment analysis?',('MILESTONE 3', 'Pipeline'))
13
 
14
  st.write('You selected:', option)
15
 
16
  if option == 'MILESTONE 3':
17
- polarity = TextBlob(textIn).sentiment.polarity
18
- subjectivity = TextBlob(textIn).sentiment.subjectivity
19
- sentiment = ''
20
- if polarity < 0:
21
- sentiment = 'Negative'
22
- elif polarity == 0:
23
- sentiment = 'Neutral'
24
- else:
25
- sentiment = 'Positive'
26
- st.write('According to TextBlob, input text is ', sentiment, ' and a subjectivity score (from 0 being objective to 1 being subjective) of ', subjectivity)
27
-
28
- # model_name_0 = "Rathgeberj/milestone3_0"
29
  # model_0 = AutoModelForSequenceClassification.from_pretrained(model_name_0)
30
- # tokenizer_0 = AutoTokenizer.from_pretrained(model_name_0)
31
- # classifier_0 = pipeline(task="sentiment-analysis", model=model_0, tokenizer=tokenizer_0)
 
32
 
33
- # model_name_1 = "Rathgeberj/milestone3_1"
34
  # model_1 = AutoModelForSequenceClassification.from_pretrained(model_name_1)
35
- # tokenizer_1 = AutoTokenizer.from_pretrained(model_name_1)
36
- # classifier_1 = pipeline(task="sentiment-analysis", model=model_1, tokenizer=tokenizer_1)
 
37
 
38
- # model_name_2 = "Rathgeberj/milestone3_2"
39
  # model_2 = AutoModelForSequenceClassification.from_pretrained(model_name_2)
40
- # tokenizer_2 = AutoTokenizer.from_pretrained(model_name_2)
41
- # classifier_2 = pipeline(task="sentiment-analysis", model=model_2, tokenizer=tokenizer_2)
 
42
 
43
- # model_name_3 = "Rathgeberj/milestone3_3"
44
  # model_3 = AutoModelForSequenceClassification.from_pretrained(model_name_3)
45
- # tokenizer_3 = AutoTokenizer.from_pretrained(model_name_3)
46
- # classifier_3 = pipeline(task="sentiment-analysis", model=model_3, tokenizer=tokenizer_3)
 
47
 
48
- # model_name_4 = "Rathgeberj/milestone3_4"
49
  # model_4 = AutoModelForSequenceClassification.from_pretrained(model_name_4)
50
- # tokenizer_4 = AutoTokenizer.from_pretrained(model_name_4)
51
- # classifier_4 = pipeline(task="sentiment-analysis", model=model_4, tokenizer=tokenizer_4)
 
52
 
53
- # model_name_5 = "Rathgeberj/milestone3_5"
54
  # model_5 = AutoModelForSequenceClassification.from_pretrained(model_name_5)
55
- # tokenizer_5 = AutoTokenizer.from_pretrained(model_name_5)
56
- # classifier_5 = pipeline(task="sentiment-analysis", model=model_5, tokenizer=tokenizer_5)
 
57
 
58
- # models = [model_0, model_1, model_2, model_3, model_4, model_5]
59
- # tokenizers = [tokenizer_0, tokenizer_1, tokenizer_2, tokenizer_3, tokenizer_4, tokenizer_5]
60
- # classifiers = [classifier_0, classifier_1, classifier_2, classifier_3, classifier_4, classifier_5]
61
 
62
- # X_train = [textIn]
63
- # batch = tokenizer(X_train, padding=True, truncation=True, max_length=512, return_tensors="pt")
 
 
 
 
64
 
65
-
66
- # st.write('TESTING2')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  if option == 'Pipeline':
69
 
@@ -75,17 +145,17 @@ if option == 'Pipeline':
75
  preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
76
  st.write('According to Pipeline, input text is ', preds[0]['label'], ' with a confidence of ', preds[0]['score'])
77
 
78
- # if option == 'TextBlob':
79
- # polarity = TextBlob(textIn).sentiment.polarity
80
- # subjectivity = TextBlob(textIn).sentiment.subjectivity
81
- # sentiment = ''
82
- # if polarity < 0:
83
- # sentiment = 'Negative'
84
- # elif polarity == 0:
85
- # sentiment = 'Neutral'
86
- # else:
87
- # sentiment = 'Positive'
88
- # st.write('According to TextBlob, input text is ', sentiment, ' and a subjectivity score (from 0 being objective to 1 being subjective) of ', subjectivity)
89
 
90
 
91
  #------------------------------------------------------------------------
@@ -113,4 +183,4 @@ if option == 'Pipeline':
113
  # tokenizer = AutoTokenizer.from_pretrained(save_directory)
114
  # model = AutoModelForSequenceClassification.from_pretrained(save_directory)
115
 
116
- #------------------------------------------------------------------------
 
5
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
6
  import torch
7
  import torch.nn.functional as F
8
+ from transformers import BertForMaskedLM
9
+ import pandas as pd
10
+
11
+ # model = BertForMaskedLM.from_pretrained("remi/bertabs-finetuned-extractive-abstractive-summarization")
12
 
13
  textIn = st.text_input("Input Text Here:", "I really like the color of your car!")
14
 
15
+ option = st.selectbox('Which pre-trained model would you like for your sentiment analysis?',('MILESTONE 3', 'Pipeline', 'TextBlob'))
 
16
 
17
  st.write('You selected:', option)
18
 
19
  if option == 'MILESTONE 3':
20
+ model_name_0 = "Rathgeberj/milestone3_0"
 
 
 
 
 
 
 
 
 
 
 
21
  # model_0 = AutoModelForSequenceClassification.from_pretrained(model_name_0)
22
+ model_0 = BertForMaskedLM.from_pretrained(model_name_0)
23
+ tokenizer_0 = AutoTokenizer.from_pretrained(model_name_0)
24
+ classifier_0 = pipeline(task="sentiment-analysis", model=model_0, tokenizer=tokenizer_0)
25
 
26
+ model_name_1 = "Rathgeberj/milestone3_1"
27
  # model_1 = AutoModelForSequenceClassification.from_pretrained(model_name_1)
28
+ model_1 = BertForMaskedLM.from_pretrained(model_name_1)
29
+ tokenizer_1 = AutoTokenizer.from_pretrained(model_name_1)
30
+ classifier_1 = pipeline(task="sentiment-analysis", model=model_1, tokenizer=tokenizer_1)
31
 
32
+ model_name_2 = "Rathgeberj/milestone3_2"
33
  # model_2 = AutoModelForSequenceClassification.from_pretrained(model_name_2)
34
+ model_2 = BertForMaskedLM.from_pretrained(model_name_2)
35
+ tokenizer_2 = AutoTokenizer.from_pretrained(model_name_2)
36
+ classifier_2 = pipeline(task="sentiment-analysis", model=model_2, tokenizer=tokenizer_2)
37
 
38
+ model_name_3 = "Rathgeberj/milestone3_3"
39
  # model_3 = AutoModelForSequenceClassification.from_pretrained(model_name_3)
40
+ model_3 = BertForMaskedLM.from_pretrained(model_name_3)
41
+ tokenizer_3 = AutoTokenizer.from_pretrained(model_name_3)
42
+ classifier_3 = pipeline(task="sentiment-analysis", model=model_3, tokenizer=tokenizer_3)
43
 
44
+ model_name_4 = "Rathgeberj/milestone3_4"
45
  # model_4 = AutoModelForSequenceClassification.from_pretrained(model_name_4)
46
+ model_4 = BertForMaskedLM.from_pretrained(model_name_4)
47
+ tokenizer_4 = AutoTokenizer.from_pretrained(model_name_4)
48
+ classifier_4 = pipeline(task="sentiment-analysis", model=model_4, tokenizer=tokenizer_4)
49
 
50
+ model_name_5 = "Rathgeberj/milestone3_5"
51
  # model_5 = AutoModelForSequenceClassification.from_pretrained(model_name_5)
52
+ model_5 = BertForMaskedLM.from_pretrained(model_name_5)
53
+ tokenizer_5 = AutoTokenizer.from_pretrained(model_name_5)
54
+ classifier_5 = pipeline(task="sentiment-analysis", model=model_5, tokenizer=tokenizer_5)
55
 
56
+ models = [model_0, model_1, model_2, model_3, model_4, model_5]
57
+ tokenizers = [tokenizer_0, tokenizer_1, tokenizer_2, tokenizer_3, tokenizer_4, tokenizer_5]
58
+ classifiers = [classifier_0, classifier_1, classifier_2, classifier_3, classifier_4, classifier_5]
59
 
60
+ st.write('IF YOURE READING THIS: I was unable to complete a fully functioning milestone 3. \
61
+ If this message print, that means my program successfully loaded my pretrained models. \
62
+ They are fine tuned iterations of the Bert uncased model, trained on the given training data. \
63
+ The issue arose when I tried to use my models to analyze the input string, and after much troubleshooting, \
64
+ I was unable to get it to work. My pre-processing and training algorithm, along with each models .json and config \
65
+ files will be linked in the github along with the tokenizer I used.')
66
 
67
+ # X_train = [textIn]
68
+ # batch = tokenizer_0(X_train, padding=True, truncation=True, max_length=512, return_tensors="pt")
69
+
70
+ # with torch.no_grad():
71
+ # outputs = model_0(**batch_0, labels=torch.tensor([1, 0]))
72
+ # predictions = F.softmax(outputs.logits, dim=1)
73
+ # labels = torch.argmax(predictions, dim=1)
74
+ # labels = [model.config.id2label[label_id] for label_id in labels.tolist()]
75
+
76
+ # st.write(predictions['label'])
77
+
78
+
79
+ col = ['Tweet', 'Highest_Toxicity_Class_Overall', 'Score_Overall', 'Highest_Toxicity_Class_Except_Toxic', 'Score_Except_Toxic']
80
+ df = pd.DataFrame(columns=col)
81
+ pre_populated_tweets = ['Yo bitch Ja Rule is more succesful then youll ever be whats up with you and hating you sad mofuckas...i should bitch slap ur pethedic white faces and get you to kiss my ass you guys sicken me. Ja rule is about pride in da music man. dont diss that shit on him. and nothin is wrong bein like tupac he was a brother too...fuckin white boys get things right next time.',
82
+ 'If you have a look back at the source, the information I updated was the correct form. I can only guess the source hadnt updated. I shall update the information once again but thank you for your message.',
83
+ 'I dont anonymously edit articles at all.',
84
+ 'Thank you for understanding. I think very highly of you and would not revert without discussion.',
85
+ 'Please do not add nonsense to Wikipedia. Such edits are considered vandalism and quickly undone. If you would like to experiment, please use the sandbox instead. Thank you. -',
86
+ 'Dear god this site is horrible.',
87
+ 'I think its crap that the link to roggenbier is to this article. Somebody that knows how to do things should change it.',
88
+ 'Please stop. If you continue to vandalize Wikipedia, as you did to Homosexuality, you will be blocked from editing.',
89
+ 'yeah, thanks for reviving the tradition of pissing all over articles because you want to live out your ethnic essentialism. Why let mere facts get into the way of enjoying that.',
90
+ 'Ive deleted the page , as we have no evidence that you are the person named on that page, and its content goes against Wikipedias policies for the use of user pages.',
91
+ ]
92
+ HTCO = [0]*10
93
+ SO = [0]*10
94
+ HTCET = [0]*10
95
+ SET = [0]*10
96
+
97
+
98
+
99
+ pred_data = []
100
+
101
+ # for i in range(10):
102
+ # X_train = pre_populated_tweets[i]
103
+ # for j in range(6):
104
+ # batch = tokenizers[j](X_train, padding=True, truncation=True, max_length=512, return_tensors="pt")
105
+ # with torch.no_grad():
106
+ # outputs = models[j](**batch, labels=torch.tensor([1, 0]))
107
+ # predictions = F.softmax(outputs.logits, dim=1)
108
+ # labels = torch.argmax(predictions, dim=1)
109
+ # labels = [model.config.id2label[label_id] for label_id in labels.tolist()]
110
+ # pred_data.append(predictions)
111
+
112
+ # st.write(pred_data[0]['label'])
113
+
114
+ # for i in range(10):
115
+ # HTCO[i]=pred_data[i]['label']
116
+
117
+ df = df.assign(Tweet=pre_populated_tweets)
118
+ df = df.assign(Highest_Toxicity_Class_Overall=HTCO)
119
+ df = df.assign(Score_Overall=SO)
120
+ df = df.assign(Highest_Toxicity_Class_Except_Toxic=HTCET)
121
+ df = df.assign(Score_Except_Toxic=SET)
122
+
123
+ # X_train = 'I dont anonymously edit articles at all.'
124
+ # batch = tokenizers[0](X_train, padding=True, truncation=True, max_length=512, return_tensors="pt")
125
+
126
+ # with torch.no_grad():
127
+ # outputs = models[0](**batch, labels=torch.tensor([1, 0]))
128
+ # predictions = F.softmax(outputs.logits, dim=1)
129
+ # labels = torch.argmax(predictions, dim=1)
130
+ # labels = [model.config.id2label[label_id] for label_id in labels.tolist()]
131
+ # pred_data.append(predictions)
132
+
133
+
134
+ st.table(df)
135
+
136
+ # st.write(pred_data)
137
 
138
  if option == 'Pipeline':
139
 
 
145
  preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
146
  st.write('According to Pipeline, input text is ', preds[0]['label'], ' with a confidence of ', preds[0]['score'])
147
 
148
+ if option == 'TextBlob':
149
+ polarity = TextBlob(textIn).sentiment.polarity
150
+ subjectivity = TextBlob(textIn).sentiment.subjectivity
151
+ sentiment = ''
152
+ if polarity < 0:
153
+ sentiment = 'Negative'
154
+ elif polarity == 0:
155
+ sentiment = 'Neutral'
156
+ else:
157
+ sentiment = 'Positive'
158
+ st.write('According to TextBlob, input text is ', sentiment, ' and a subjectivity score (from 0 being objective to 1 being subjective) of ', subjectivity)
159
 
160
 
161
  #------------------------------------------------------------------------
 
183
  # tokenizer = AutoTokenizer.from_pretrained(save_directory)
184
  # model = AutoModelForSequenceClassification.from_pretrained(save_directory)
185
 
186
+ #------------------------------------------------------------------------