Jeffrey Rathgeber Jr
testagain
8e88f49 unverified
raw
history blame
5.4 kB
import streamlit as st
import tensorflow as tf
from transformers import pipeline
from textblob import TextBlob
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F
textIn = st.text_input("Input Text Here:", "I really like the color of your car!")
option = st.selectbox('Which pre-trained model would you like for your sentiment analysis?',('Pipeline', 'TextBlob', 'MILESTONE 3: FINE-TUNED'))
while True:
st.write('You selected:', option)
if option == 'MILESTONE 3: FINE-TUNED':
polarity = TextBlob(textIn).sentiment.polarity
subjectivity = TextBlob(textIn).sentiment.subjectivity
sentiment = ''
if polarity < 0:
sentiment = 'Negative'
elif polarity == 0:
sentiment = 'Neutral'
else:
sentiment = 'Positive'
st.write('According to TextBlob, input text is ', sentiment, ' and a subjectivity score (from 0 being objective to 1 being subjective) of ', subjectivity)
# model_name_0 = "Rathgeberj/milestone3_0"
# model_0 = AutoModelForSequenceClassification.from_pretrained(model_name_0)
# tokenizer_0 = AutoTokenizer.from_pretrained(model_name_0)
# classifier_0 = pipeline(task="sentiment-analysis", model=model_0, tokenizer=tokenizer_0)
# model_name_1 = "Rathgeberj/milestone3_1"
# model_1 = AutoModelForSequenceClassification.from_pretrained(model_name_1)
# tokenizer_1 = AutoTokenizer.from_pretrained(model_name_1)
# classifier_1 = pipeline(task="sentiment-analysis", model=model_1, tokenizer=tokenizer_1)
# model_name_2 = "Rathgeberj/milestone3_2"
# model_2 = AutoModelForSequenceClassification.from_pretrained(model_name_2)
# tokenizer_2 = AutoTokenizer.from_pretrained(model_name_2)
# classifier_2 = pipeline(task="sentiment-analysis", model=model_2, tokenizer=tokenizer_2)
# model_name_3 = "Rathgeberj/milestone3_3"
# model_3 = AutoModelForSequenceClassification.from_pretrained(model_name_3)
# tokenizer_3 = AutoTokenizer.from_pretrained(model_name_3)
# classifier_3 = pipeline(task="sentiment-analysis", model=model_3, tokenizer=tokenizer_3)
# model_name_4 = "Rathgeberj/milestone3_4"
# model_4 = AutoModelForSequenceClassification.from_pretrained(model_name_4)
# tokenizer_4 = AutoTokenizer.from_pretrained(model_name_4)
# classifier_4 = pipeline(task="sentiment-analysis", model=model_4, tokenizer=tokenizer_4)
# model_name_5 = "Rathgeberj/milestone3_5"
# model_5 = AutoModelForSequenceClassification.from_pretrained(model_name_5)
# tokenizer_5 = AutoTokenizer.from_pretrained(model_name_5)
# classifier_5 = pipeline(task="sentiment-analysis", model=model_5, tokenizer=tokenizer_5)
# models = [model_0, model_1, model_2, model_3, model_4, model_5]
# tokenizers = [tokenizer_0, tokenizer_1, tokenizer_2, tokenizer_3, tokenizer_4, tokenizer_5]
# classifiers = [classifier_0, classifier_1, classifier_2, classifier_3, classifier_4, classifier_5]
# X_train = [textIn]
# batch = tokenizer(X_train, padding=True, truncation=True, max_length=512, return_tensors="pt")
# st.write('TESTING2')
if option == 'Pipeline':
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
classifier = pipeline(task="sentiment-analysis", model=model, tokenizer=tokenizer)
preds = classifier(textIn)
preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
st.write('According to Pipeline, input text is ', preds[0]['label'], ' with a confidence of ', preds[0]['score'])
# if option == 'TextBlob':
# polarity = TextBlob(textIn).sentiment.polarity
# subjectivity = TextBlob(textIn).sentiment.subjectivity
# sentiment = ''
# if polarity < 0:
# sentiment = 'Negative'
# elif polarity == 0:
# sentiment = 'Neutral'
# else:
# sentiment = 'Positive'
# st.write('According to TextBlob, input text is ', sentiment, ' and a subjectivity score (from 0 being objective to 1 being subjective) of ', subjectivity)
#------------------------------------------------------------------------
# tokens = tokenizer.tokenize(textIn)
# token_ids = tokenizer.convert_tokens_to_ids(tokens)
# input_ids = tokenizer(textIn)
# X_train = [textIn]
# batch = tokenizer(X_train, padding=True, truncation=True, max_length=512, return_tensors="pt")
# # batch = torch.tensor(batchbatch["input_ids"])
# with torch.no_grad():
# outputs = model(**batch, labels=torch.tensor([1, 0]))
# predictions = F.softmax(outputs.logits, dim=1)
# labels = torch.argmax(predictions, dim=1)
# labels = [model.config.id2label[label_id] for label_id in labels.tolist()]
# # save_directory = "saved"
# tokenizer.save_pretrained(save_directory)
# model.save_pretrained(save_directory)
# tokenizer = AutoTokenizer.from_pretrained(save_directory)
# model = AutoModelForSequenceClassification.from_pretrained(save_directory)
#------------------------------------------------------------------------