File size: 5,400 Bytes
35f56ba
7749ef6
47ef74f
cfa2b70
cd87a42
 
 
d71bb22
47ef74f
 
26f67cd
c704d04
756468c
 
 
 
8e88f49
 
 
 
 
 
 
 
 
 
756468c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39aeda0
 
 
756468c
 
 
 
 
 
 
 
 
 
 
 
 
8e88f49
 
 
 
 
 
 
 
 
 
 
47ef74f
d5b90e7
dff0151
21d64ee
dff0151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
756468c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import streamlit as st
import tensorflow as tf
from transformers import pipeline
from textblob import TextBlob
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F

textIn = st.text_input("Input Text Here:", "I really like the color of your car!")

option = st.selectbox('Which pre-trained model would you like for your sentiment analysis?',('Pipeline', 'TextBlob', 'MILESTONE 3: FINE-TUNED'))

while True:
    st.write('You selected:', option)

    if option == 'MILESTONE 3: FINE-TUNED':
        polarity = TextBlob(textIn).sentiment.polarity
        subjectivity = TextBlob(textIn).sentiment.subjectivity
        sentiment = ''
        if polarity < 0:
            sentiment = 'Negative'
        elif polarity == 0:
            sentiment = 'Neutral'
        else:
            sentiment = 'Positive'
        st.write('According to TextBlob, input text is ', sentiment, ' and a subjectivity score (from 0 being objective to 1 being subjective) of ', subjectivity)

        # model_name_0 = "Rathgeberj/milestone3_0"
        # model_0 = AutoModelForSequenceClassification.from_pretrained(model_name_0)
        # tokenizer_0 = AutoTokenizer.from_pretrained(model_name_0)
        # classifier_0 = pipeline(task="sentiment-analysis", model=model_0, tokenizer=tokenizer_0)

        # model_name_1 = "Rathgeberj/milestone3_1"
        # model_1 = AutoModelForSequenceClassification.from_pretrained(model_name_1)
        # tokenizer_1 = AutoTokenizer.from_pretrained(model_name_1)
        # classifier_1 = pipeline(task="sentiment-analysis", model=model_1, tokenizer=tokenizer_1)

        # model_name_2 = "Rathgeberj/milestone3_2"
        # model_2 = AutoModelForSequenceClassification.from_pretrained(model_name_2)
        # tokenizer_2 = AutoTokenizer.from_pretrained(model_name_2)
        # classifier_2 = pipeline(task="sentiment-analysis", model=model_2, tokenizer=tokenizer_2)

        # model_name_3 = "Rathgeberj/milestone3_3"
        # model_3 = AutoModelForSequenceClassification.from_pretrained(model_name_3)
        # tokenizer_3 = AutoTokenizer.from_pretrained(model_name_3)
        # classifier_3 = pipeline(task="sentiment-analysis", model=model_3, tokenizer=tokenizer_3)

        # model_name_4 = "Rathgeberj/milestone3_4"
        # model_4 = AutoModelForSequenceClassification.from_pretrained(model_name_4)
        # tokenizer_4 = AutoTokenizer.from_pretrained(model_name_4)
        # classifier_4 = pipeline(task="sentiment-analysis", model=model_4, tokenizer=tokenizer_4)

        # model_name_5 = "Rathgeberj/milestone3_5"
        # model_5 = AutoModelForSequenceClassification.from_pretrained(model_name_5)
        # tokenizer_5 = AutoTokenizer.from_pretrained(model_name_5)
        # classifier_5 = pipeline(task="sentiment-analysis", model=model_5, tokenizer=tokenizer_5)

        # models = [model_0, model_1, model_2, model_3, model_4, model_5]
        # tokenizers = [tokenizer_0, tokenizer_1, tokenizer_2, tokenizer_3, tokenizer_4, tokenizer_5]
        # classifiers = [classifier_0, classifier_1, classifier_2, classifier_3, classifier_4, classifier_5]

        # X_train = [textIn]
        # batch = tokenizer(X_train, padding=True, truncation=True, max_length=512, return_tensors="pt")

        
        # st.write('TESTING2')
    
    if option == 'Pipeline':

        model_name = "distilbert-base-uncased-finetuned-sst-2-english"
        model = AutoModelForSequenceClassification.from_pretrained(model_name)
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        classifier = pipeline(task="sentiment-analysis", model=model, tokenizer=tokenizer)
        preds = classifier(textIn)
        preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
        st.write('According to Pipeline, input text is ', preds[0]['label'], ' with a confidence of ', preds[0]['score'])

    # if option == 'TextBlob':
    #     polarity = TextBlob(textIn).sentiment.polarity
    #     subjectivity = TextBlob(textIn).sentiment.subjectivity
    #     sentiment = ''
    #     if polarity < 0:
    #         sentiment = 'Negative'
    #     elif polarity == 0:
    #         sentiment = 'Neutral'
    #     else:
    #         sentiment = 'Positive'
    #     st.write('According to TextBlob, input text is ', sentiment, ' and a subjectivity score (from 0 being objective to 1 being subjective) of ', subjectivity)


#------------------------------------------------------------------------

# tokens = tokenizer.tokenize(textIn)
# token_ids = tokenizer.convert_tokens_to_ids(tokens)
# input_ids = tokenizer(textIn)


# X_train = [textIn]

# batch = tokenizer(X_train, padding=True, truncation=True, max_length=512, return_tensors="pt")
# # batch = torch.tensor(batchbatch["input_ids"])

# with torch.no_grad():
#     outputs = model(**batch, labels=torch.tensor([1, 0]))
#     predictions = F.softmax(outputs.logits, dim=1)
#     labels = torch.argmax(predictions, dim=1)
#     labels = [model.config.id2label[label_id] for label_id in labels.tolist()]

# # save_directory = "saved"
# tokenizer.save_pretrained(save_directory)
# model.save_pretrained(save_directory)

# tokenizer = AutoTokenizer.from_pretrained(save_directory)
# model = AutoModelForSequenceClassification.from_pretrained(save_directory)

#------------------------------------------------------------------------