File size: 5,130 Bytes
35f56ba
7749ef6
47ef74f
cfa2b70
cd87a42
 
 
8389a97
 
9c5b410
d71bb22
26f6079
cd87a42
26dac8d
cd87a42
26f6079
e43f53b
26f6079
26dac8d
fb7ca04
8389a97
fb7ca04
26dac8d
 
c704d04
fb7ca04
c6e02a8
fb7ca04
 
 
c6e02a8
fb7ca04
c6e02a8
fb7ca04
 
 
c6e02a8
fb7ca04
c6e02a8
fb7ca04
 
 
c6e02a8
fb7ca04
c6e02a8
fb7ca04
 
 
c6e02a8
fb7ca04
c6e02a8
fb7ca04
 
 
c6e02a8
fb7ca04
 
 
26f6079
 
 
26dac8d
26f6079
 
 
 
 
 
 
 
 
 
 
26dac8d
 
 
 
 
 
 
 
 
 
 
47ef74f
d5b90e7
dff0151
21d64ee
dff0151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26f6079
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import streamlit as st
import tensorflow as tf
from transformers import pipeline
from textblob import TextBlob
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F
from transformers import BertForMaskedLM

# model = BertForMaskedLM.from_pretrained("remi/bertabs-finetuned-extractive-abstractive-summarization")

textIn = st.text_input("Input Text Here:", "I really like the color of your car!")

option = st.selectbox('Which pre-trained model would you like for your sentiment analysis?',('MILESTONE 3', 'Pipeline', 'TextBlob'))

st.write('You selected:', option)

if option == 'MILESTONE 3':
    st.write('test1')
    model_name_0 = "Rathgeberj/milestone3_0"
    # model_0 = AutoModelForSequenceClassification.from_pretrained(model_name_0)
    model_0 = BertForMaskedLM.from_pretrained(model_name_0)
    tokenizer_0 = AutoTokenizer.from_pretrained(model_name_0)
    classifier_0 = pipeline(task="sentiment-analysis", model=model_0, tokenizer=tokenizer_0)

    model_name_1 = "Rathgeberj/milestone3_1"
    # model_1 = AutoModelForSequenceClassification.from_pretrained(model_name_1)
    model_1 = BertForMaskedLM.from_pretrained(model_name_1)
    tokenizer_1 = AutoTokenizer.from_pretrained(model_name_1)
    classifier_1 = pipeline(task="sentiment-analysis", model=model_1, tokenizer=tokenizer_1)

    model_name_2 = "Rathgeberj/milestone3_2"
    # model_2 = AutoModelForSequenceClassification.from_pretrained(model_name_2)
    model_2 = BertForMaskedLM.from_pretrained(model_name_2)
    tokenizer_2 = AutoTokenizer.from_pretrained(model_name_2)
    classifier_2 = pipeline(task="sentiment-analysis", model=model_2, tokenizer=tokenizer_2)

    model_name_3 = "Rathgeberj/milestone3_3"
    # model_3 = AutoModelForSequenceClassification.from_pretrained(model_name_3)
    model_3 = BertForMaskedLM.from_pretrained(model_name_3)
    tokenizer_3 = AutoTokenizer.from_pretrained(model_name_3)
    classifier_3 = pipeline(task="sentiment-analysis", model=model_3, tokenizer=tokenizer_3)

    model_name_4 = "Rathgeberj/milestone3_4"
    # model_4 = AutoModelForSequenceClassification.from_pretrained(model_name_4)
    model_4 = BertForMaskedLM.from_pretrained(model_name_4)
    tokenizer_4 = AutoTokenizer.from_pretrained(model_name_4)
    classifier_4 = pipeline(task="sentiment-analysis", model=model_4, tokenizer=tokenizer_4)

    model_name_5 = "Rathgeberj/milestone3_5"
    # model_5 = AutoModelForSequenceClassification.from_pretrained(model_name_5)
    model_5 = BertForMaskedLM.from_pretrained(model_name_5)
    tokenizer_5 = AutoTokenizer.from_pretrained(model_name_5)
    classifier_5 = pipeline(task="sentiment-analysis", model=model_5, tokenizer=tokenizer_5)

    models = [model_0, model_1, model_2, model_3, model_4, model_5]
    tokenizers = [tokenizer_0, tokenizer_1, tokenizer_2, tokenizer_3, tokenizer_4, tokenizer_5]
    classifiers = [classifier_0, classifier_1, classifier_2, classifier_3, classifier_4, classifier_5]

    # X_train = [textIn]
    # batch = tokenizer(X_train, padding=True, truncation=True, max_length=512, return_tensors="pt")
    st.write('test2')

if option == 'Pipeline':

    model_name = "distilbert-base-uncased-finetuned-sst-2-english"
    model = AutoModelForSequenceClassification.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    classifier = pipeline(task="sentiment-analysis", model=model, tokenizer=tokenizer)
    preds = classifier(textIn)
    preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
    st.write('According to Pipeline, input text is ', preds[0]['label'], ' with a confidence of ', preds[0]['score'])

if option == 'TextBlob':
    polarity = TextBlob(textIn).sentiment.polarity
    subjectivity = TextBlob(textIn).sentiment.subjectivity
    sentiment = ''
    if polarity < 0:
        sentiment = 'Negative'
    elif polarity == 0:
        sentiment = 'Neutral'
    else:
        sentiment = 'Positive'
    st.write('According to TextBlob, input text is ', sentiment, ' and a subjectivity score (from 0 being objective to 1 being subjective) of ', subjectivity)


#------------------------------------------------------------------------

# tokens = tokenizer.tokenize(textIn)
# token_ids = tokenizer.convert_tokens_to_ids(tokens)
# input_ids = tokenizer(textIn)


# X_train = [textIn]

# batch = tokenizer(X_train, padding=True, truncation=True, max_length=512, return_tensors="pt")
# # batch = torch.tensor(batchbatch["input_ids"])

# with torch.no_grad():
#     outputs = model(**batch, labels=torch.tensor([1, 0]))
#     predictions = F.softmax(outputs.logits, dim=1)
#     labels = torch.argmax(predictions, dim=1)
#     labels = [model.config.id2label[label_id] for label_id in labels.tolist()]

# # save_directory = "saved"
# tokenizer.save_pretrained(save_directory)
# model.save_pretrained(save_directory)

# tokenizer = AutoTokenizer.from_pretrained(save_directory)
# model = AutoModelForSequenceClassification.from_pretrained(save_directory)

#------------------------------------------------------------------------