from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.naive_bayes import MultinomialNB from sklearn.linear_model import LogisticRegression from sklearn.pipeline import make_pipeline from sklearn.pipeline import Pipeline import joblib import re import string import nltk nltk.download('stopwords') nltk.download('punkt') import streamlit as st import functions torch.manual_seed(1) # Preprocess function import re from contractions import contractions_dict from nltk.corpus import stopwords from nltk.tokenize import word_tokenize def preprocess_text(text): # Remove URLs url_pattern = re.compile(r'https?://\S+') text = url_pattern.sub(' ', text) # Remove HTML Tags html_pattern = re.compile(r'<[^<>]+>') text = html_pattern.sub(' ', text) # Expand contractions text = ' '.join([contractions_dict.get(word, word) for word in text.split()]) # Remove punctuation and digits text = re.sub(r'[^\w\s]', ' ', text) # Remove emojis emoji_pattern = re.compile("[" u"\U0001F600-\U0001F64F" u"\U0001F300-\U0001F5FF" u"\U0001F680-\U0001F6FF" u"\U0001F1E0-\U0001F1FF" u"\U0001F1F2-\U0001F1F4" u"\U0001F1E6-\U0001F1FF" u"\U0001F600-\U0001F64F" u"\U00002702-\U000027B0" u"\U000024C2-\U0001F251" u"\U0001f926-\U0001f937" u"\U0001F1F2" u"\U0001F1F4" u"\U0001F620" u"\u200d" u"\u2640-\u2642" "]+", flags=re.UNICODE) text = emoji_pattern.sub(' ', text) # Convert to lowercase text = text.lower() # Tokenize and remove stopwords stop_words = set(stopwords.words('english')) tokens = word_tokenize(text) tokens = [token for token in tokens if token not in stop_words] # Join tokens back into text text = ' '.join(tokens) return text # Main function model_NB_path = './model_NB.sav' model_NB = joblib.load(model_NB_path) model_LR_path = './model_LR.sav' model_LR = joblib.load(model_LR_path) text = st.text_area('Enter some text !!! (English text : D )') if text: out = functions.sentiment_analysis_LR(text) if out == 0: out = 'negative' st.json(out) else: out = 'positive' st.json(out)