Spaces:
Sleeping
Sleeping
File size: 3,828 Bytes
7f81307 0147eeb 5e81a63 7179214 593dcd8 5e81a63 593dcd8 7179214 5e81a63 6f318d4 5e81a63 611cacb 5e81a63 743c8db 5e81a63 4a09c2d 5e81a63 4a09c2d 5e81a63 4a09c2d 5e81a63 4a09c2d 593dcd8 c25ad0a 4a09c2d c25ad0a 5e81a63 743c8db 5e81a63 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import streamlit as st
import torch
from transformers import AutoModelForSequenceClassification, pipeline, AutoTokenizer, DistilBertForSequenceClassification, DistilBertTokenizerFast
import pandas as pd
import comments
from random import randint
def predict_cyberbullying_probability(sentence, tokenizer, model):
# Preprocess the input sentence
inputs = tokenizer(sentence, padding='max_length', return_token_type_ids=False, return_attention_mask=True, truncation=True, max_length=512, return_tensors='pt')
attention_mask = inputs['attention_mask'].flatten()
inputs = inputs['input_ids'].flatten()
# print("\n\ninputs\n\n", inputs)
# Disable gradient computation
with torch.no_grad():
# Forward pass
outputs = model(inputs, attention_mask=attention_mask)
probs = torch.sigmoid(outputs.logits.flatten())
res = probs.numpy().tolist()
return res
# @st.cache
def perform_cyberbullying_analysis(tweet):
with st.spinner(text="loading model, wait until spinner ends..."):
model = AutoModelForSequenceClassification.from_pretrained('kingsotn/finetuned_cyberbullying')
tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
df = pd.DataFrame({'comment': [tweet]})
list_probs = predict_cyberbullying_probability(tweet, tokenizer, model)
for i, label in enumerate(labels[1:]):
df[label] = list_probs[i]
return df
def perform_default_analysis(model_name):
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
clf = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer, framework="pt")
tweet = st.text_area(label="Enter Text:",value="I'm nice at ping pong")
submitted = st.form_submit_button("Analyze")
if submitted:
#loading bar
with st.spinner(text="loading..."):
out = clf(tweet)
st.json(out)
if out[0]["label"] == "POSITIVE" or out[0]["label"] == "POS":
st.balloons()
# prompt = f"{basic_prompt} + \n\nThe user wrote a tweet that says: {tweet}, compliment them on how nice of a person they are! Remember try to be as cringe and awkard as possible!"
# response = generator(prompt, max_length=1000)[0]
st.success("nice tweet!")
else:
# prompt = f"{basic_prompt} + \n\nThe user wrote a tweet that says: {tweet}, tell them on how terrible of a person they are! Remember try to be as cringe and awkard as possible!"
# response = generator(prompt, max_length=1000)[0]
st.error("bad tweet!")
# main -->
st.title("Toxic Tweets Analyzer")
image = "kanye_tweet.jpg"
st.image(image, use_column_width=True)
labels = ['comment', 'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
# toxic_list = st.cache(comments.comments)
with st.form("my_form"):
#select model
model_name = st.selectbox("Enter a text and select a pre-trained model to get the sentiment analysis", ["kingsotn/finetuned_cyberbullying", "distilbert-base-uncased-finetuned-sst-2-english", "finiteautomata/bertweet-base-sentiment-analysis", "distilbert-base-uncased"])
if model_name == "kingsotn/finetuned_cyberbullying":
default = "I'm nice at ping pong"
tweet = st.text_area(label="Enter Text:",value=default)
submitted = st.form_submit_button("Analyze textbox")
random = st.form_submit_button("Analyze a random 😈😈😈 tweet")
if random:
tweet = comments.comments[randint(0, 354)]
df = perform_cyberbullying_analysis(tweet)
# Display the cached table
st.table(df)
else:
perform_default_analysis(model_name) |