perman2011 commited on
Commit
835b4d7
·
1 Parent(s): e224ad1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -0
app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from torch.nn import functional as F
4
+ import torch.optim as optim
5
+ import pandas as pd
6
+ import numpy as np
7
+ import seaborn as sns
8
+ import matplotlib.pyplot as plt
9
+
10
+ from sklearn.model_selection import train_test_split
11
+ from sklearn.model_selection import KFold
12
+ from sklearn.model_selection import GridSearchCV
13
+ from sklearn.ensemble import RandomForestClassifier
14
+ from sklearn.feature_extraction.text import TfidfVectorizer
15
+ from sklearn.naive_bayes import MultinomialNB
16
+ from sklearn.linear_model import LogisticRegression
17
+ from sklearn.metrics import accuracy_score, classification_report
18
+ from sklearn.metrics import roc_curve, auc, confusion_matrix
19
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
20
+ from sklearn.pipeline import make_pipeline
21
+ from sklearn.pipeline import Pipeline
22
+ import joblib
23
+
24
+ import re
25
+ import string
26
+ import nltk
27
+ nltk.download('stopwords')
28
+ nltk.download('punkt')
29
+
30
+ import os
31
+ import streamlit as st
32
+
33
+ import functions
34
+
35
+ torch.manual_seed(1)
36
+
37
+ # Preprocess function
38
+ import re
39
+ from contractions import contractions_dict
40
+ from nltk.corpus import stopwords
41
+ from nltk.tokenize import word_tokenize
42
+
43
+ def preprocess_text(text):
44
+ # Remove URLs
45
+ url_pattern = re.compile(r'https?://\S+')
46
+ text = url_pattern.sub(' ', text)
47
+
48
+ # Remove HTML Tags
49
+ html_pattern = re.compile(r'<[^<>]+>')
50
+ text = html_pattern.sub(' ', text)
51
+
52
+ # Expand contractions
53
+ text = ' '.join([contractions_dict.get(word, word) for word in text.split()])
54
+
55
+ # Remove punctuation and digits
56
+ text = re.sub(r'[^\w\s]', ' ', text)
57
+
58
+ # Remove emojis
59
+ emoji_pattern = re.compile("["
60
+ u"\U0001F600-\U0001F64F"
61
+ u"\U0001F300-\U0001F5FF"
62
+ u"\U0001F680-\U0001F6FF"
63
+ u"\U0001F1E0-\U0001F1FF"
64
+ u"\U0001F1F2-\U0001F1F4"
65
+ u"\U0001F1E6-\U0001F1FF"
66
+ u"\U0001F600-\U0001F64F"
67
+ u"\U00002702-\U000027B0"
68
+ u"\U000024C2-\U0001F251"
69
+ u"\U0001f926-\U0001f937"
70
+ u"\U0001F1F2"
71
+ u"\U0001F1F4"
72
+ u"\U0001F620"
73
+ u"\u200d"
74
+ u"\u2640-\u2642"
75
+ "]+", flags=re.UNICODE)
76
+ text = emoji_pattern.sub(' ', text)
77
+
78
+ # Convert to lowercase
79
+ text = text.lower()
80
+
81
+ # Tokenize and remove stopwords
82
+ stop_words = set(stopwords.words('english'))
83
+ tokens = word_tokenize(text)
84
+ tokens = [token for token in tokens if token not in stop_words]
85
+
86
+ # Join tokens back into text
87
+ text = ' '.join(tokens)
88
+
89
+ return text
90
+
91
+ # Main function
92
+ model_NB_path = './model_NB.sav'
93
+ model_NB = joblib.load(model_NB_path)
94
+
95
+ model_LR_path = './model_LR.sav'
96
+ model_LR = joblib.load(model_LR_path)
97
+
98
+
99
+ text = st.text_area('Enter some text !!! (English text : D )')
100
+ if text:
101
+ out = functions.sentiment_analysis_LR(text)
102
+ if out == 0:
103
+ out = 'negative'
104
+ st.json(out)
105
+ else:
106
+ out = 'positive'
107
+ st.json(out)