Spaces:
Sleeping
Sleeping
File size: 3,056 Bytes
835b4d7 5708132 835b4d7 5708132 835b4d7 c4dc8e0 835b4d7 c4dc8e0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline
import joblib
import re
import string
import nltk
nltk.download('stopwords')
nltk.download('punkt')
import streamlit as st
# Preprocess function
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
def preprocess_text(text):
# Remove URLs
url_pattern = re.compile(r'https?://\S+')
text = url_pattern.sub(' ', text)
# Remove HTML Tags
html_pattern = re.compile(r'<[^<>]+>')
text = html_pattern.sub(' ', text)
# Remove punctuation and digits
text = re.sub(r'[^\w\s]', ' ', text)
# Remove emojis
emoji_pattern = re.compile("["
u"\U0001F600-\U0001F64F"
u"\U0001F300-\U0001F5FF"
u"\U0001F680-\U0001F6FF"
u"\U0001F1E0-\U0001F1FF"
u"\U0001F1F2-\U0001F1F4"
u"\U0001F1E6-\U0001F1FF"
u"\U0001F600-\U0001F64F"
u"\U00002702-\U000027B0"
u"\U000024C2-\U0001F251"
u"\U0001f926-\U0001f937"
u"\U0001F1F2"
u"\U0001F1F4"
u"\U0001F620"
u"\u200d"
u"\u2640-\u2642"
"]+", flags=re.UNICODE)
text = emoji_pattern.sub(' ', text)
# Convert to lowercase
text = text.lower()
# Tokenize and remove stopwords
stop_words = set(stopwords.words('english'))
tokens = word_tokenize(text)
tokens = [token for token in tokens if token not in stop_words]
# Join tokens back into text
text = ' '.join(tokens)
return text
# Main function
model_NB_path = './model_NB.sav'
model_NB = joblib.load(model_NB_path)
model_LR_path = './model_LR.sav'
model_LR = joblib.load(model_LR_path)
def sentiment_analysis_LR(input):
# Assuming you have a Logistic Regression model and TfidfVectorizer in the pipeline
input = preprocess_text(input)
vectorizer = model_LR.named_steps['tfidfvectorizer']
lr_classifier = model_LR.named_steps['logisticregression']
# Transform the user input using the TF-IDF vectorizer
user_input_tfidf = vectorizer.transform([input])
# Make predictions
user_pred = lr_classifier.predict(user_input_tfidf)
# Display the prediction
if user_pred[0] == 0:
return 0
else:
return 1
def sentiment_analysis_NB(input):
input = preprocess_text(input)
vectorizer = model_NB.named_steps['tfidf']
nb_classifier = model_NB.named_steps['nb']
# Transform the user input using the TF-IDF vectorizer
user_input_tfidf = vectorizer.transform([input])
# Make predictions
user_pred = nb_classifier.predict(user_input_tfidf)
# Display the prediction
if user_pred[0] == 0:
return 0
else:
return 1
text = st.text_area('Enter some text !!! (English text : D )')
if text:
out = sentiment_analysis_LR(text)
if out == 0:
st.write('The sentence is negative')
else:
st.write('The sentence is positive') |