Spaces:
Sleeping
Sleeping
File size: 4,657 Bytes
ffc96c9 668f6af ffc96c9 aa2cadb 668f6af b6852b8 668f6af b6852b8 228ca50 668f6af 228ca50 b6852b8 a7fbbb7 b6852b8 a7fbbb7 ffc96c9 b6852b8 228ca50 668f6af e294a0a b6852b8 228ca50 b6852b8 228ca50 b6852b8 228ca50 b6852b8 228ca50 aa2cadb b6852b8 228ca50 c789552 668f6af b6852b8 c789552 aa2cadb 668f6af aa2cadb a7fbbb7 aa2cadb a7fbbb7 aa2cadb 668f6af b6852b8 ffc96c9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import streamlit as st
import pandas as pd
import numpy as np
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
# Define global variables
fine_tuned_model = "andyqin18/test-finetuned"
sample_text_num = 10
# Define analyze function
def analyze(model_name: str, text: str, top_k=1) -> dict:
'''
Output result of sentiment analysis of a text through a defined model
'''
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer, top_k=top_k)
return classifier(text)
# App title
st.title("Sentiment Analysis App - Milestone3")
st.write("This app is to analyze the sentiments behind a text.")
st.write("You can choose to use my fine-tuned model or pre-trained models.")
# Model hub
model_descrip = {
fine_tuned_model: "This is a customized BERT-base finetuned model that detects multiple toxicity for a text. \
Labels: toxic, severe_toxic, obscene, threat, insult, identity_hate",
"distilbert-base-uncased-finetuned-sst-2-english": "This model is a fine-tune checkpoint of DistilBERT-base-uncased, fine-tuned on SST-2. \
Labels: POSITIVE; NEGATIVE ",
"cardiffnlp/twitter-roberta-base-sentiment": "This is a roBERTa-base model trained on ~58M tweets and finetuned for sentiment analysis with the TweetEval benchmark. \
Labels: 0 -> Negative; 1 -> Neutral; 2 -> Positive",
"finiteautomata/bertweet-base-sentiment-analysis": "Model trained with SemEval 2017 corpus (around ~40k tweets). Base model is BERTweet, a RoBERTa model trained on English tweets. \
Labels: POS; NEU; NEG"
}
user_input = st.text_input("Enter your text:", value="NYU is the better than Columbia.")
user_model = st.selectbox("Please select a model:", model_descrip)
# Display model information
st.write("### Model Description:")
st.write(model_descrip[user_model])
# Perform analysis and print result
if st.button("Analyze"):
if not user_input:
st.write("Please enter a text.")
else:
with st.spinner("Hang on.... Analyzing..."):
# If fine-tuned
if user_model == fine_tuned_model:
result = analyze(user_model, user_input, top_k=2) # Top 2 labels with highest score
result_dict = {
"Text": [user_input],
"Highest Toxicity Class": [result[0][0]['label']],
"Highest Score": [result[0][0]['score']],
"Second Highest Toxicity Class": [result[0][1]['label']],
"Second Highest Score": [result[0][1]['score']]
}
st.dataframe(pd.DataFrame(result_dict))
# 10 Sample Table
if st.button("Click to generate ten sample analysis"):
with st.spinner("Hang on.... Analyzing..."):
df = pd.read_csv("milestone3/comp/test_comment.csv")
test_texts = df["comment_text"].values
sample_texts = np.random.choice(test_texts, size=sample_text_num, replace=False)
init_table_dict = {
"Text": [],
"Highest Toxicity Class": [],
"Highest Score": [],
"Second Highest Toxicity Class": [],
"Second Highest Score": []
}
for text in sample_texts:
result = analyze(fine_tuned_model, text[:50], top_k=2)
init_table_dict["Text"].append(text[:50])
init_table_dict["Highest Toxicity Class"].append(result[0][0]['label'])
init_table_dict["Highest Score"].append(result[0][0]['score'])
init_table_dict["Second Highest Toxicity Class"].append(result[0][1]['label'])
init_table_dict["Second Highest Score"].append(result[0][1]['score'])
st.dataframe(pd.DataFrame(init_table_dict))
else:
st.write("( ─ ‿ ‿ ─ )")
else:
result = analyze(user_model, user_input)
st.write("Result:")
st.write(f"Label: **{result[0]['label']}**")
st.write(f"Confidence Score: **{result[0]['score']}**")
else:
st.write("Go on! Try the app!") |