Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,22 +1,21 @@
|
|
1 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
2 |
-
from TurkishStemmer import TurkishStemmer
|
3 |
import string
|
4 |
# import for loading python objects (scikit-learn models)
|
5 |
import pickle
|
6 |
import nltk
|
7 |
from nltk.data import load
|
|
|
8 |
import streamlit as st
|
9 |
import sklearn
|
10 |
|
11 |
nltk.download('punkt')
|
12 |
-
trans_table = {ord(c): None for c in string.punctuation + string.digits}
|
13 |
|
14 |
def custom_tokenizer_with_Turkish_stemmer(text):
|
15 |
# tokenize text
|
16 |
# tokens = text.split(" ")
|
17 |
-
tokens = [word for word in nltk.word_tokenize(text
|
18 |
print(tokens)
|
19 |
-
stems = [
|
20 |
return stems
|
21 |
|
22 |
def predictSMSdata(test_text):
|
@@ -24,13 +23,13 @@ def predictSMSdata(test_text):
|
|
24 |
categories.sort()
|
25 |
|
26 |
# load model
|
27 |
-
filename1 = "
|
28 |
file_handle1 = open(filename1, "rb")
|
29 |
classifier = pickle.load(file_handle1)
|
30 |
file_handle1.close()
|
31 |
|
32 |
# load tfidf_vectorizer for transforming test text data
|
33 |
-
filename2 = "
|
34 |
file_handle2 = open(filename2, "rb")
|
35 |
tfidf_vectorizer = pickle.load(file_handle2)
|
36 |
file_handle2.close()
|
@@ -41,10 +40,11 @@ def predictSMSdata(test_text):
|
|
41 |
print(categories[predicted[0]])
|
42 |
return categories[predicted[0]]
|
43 |
|
44 |
-
|
|
|
45 |
|
46 |
# adding the text that will show in the text box
|
47 |
-
default_value = "
|
48 |
text = st.text_area("enter some text!", default_value)
|
49 |
if text:
|
50 |
out = predictSMSdata(text)
|
|
|
1 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
|
2 |
import string
|
3 |
# import for loading python objects (scikit-learn models)
|
4 |
import pickle
|
5 |
import nltk
|
6 |
from nltk.data import load
|
7 |
+
from nltk.stem import PorterStemmer
|
8 |
import streamlit as st
|
9 |
import sklearn
|
10 |
|
11 |
nltk.download('punkt')
|
|
|
12 |
|
13 |
def custom_tokenizer_with_Turkish_stemmer(text):
|
14 |
# tokenize text
|
15 |
# tokens = text.split(" ")
|
16 |
+
tokens = [word for word in nltk.word_tokenize(text)]
|
17 |
print(tokens)
|
18 |
+
stems = [stemmerEN.stem(item.lower()) for item in tokens]
|
19 |
return stems
|
20 |
|
21 |
def predictSMSdata(test_text):
|
|
|
23 |
categories.sort()
|
24 |
|
25 |
# load model
|
26 |
+
filename1 = "LinearSVC_SMS_spam_EN.pickle"
|
27 |
file_handle1 = open(filename1, "rb")
|
28 |
classifier = pickle.load(file_handle1)
|
29 |
file_handle1.close()
|
30 |
|
31 |
# load tfidf_vectorizer for transforming test text data
|
32 |
+
filename2 = "tfidf_vectorizer_EN.pickle"
|
33 |
file_handle2 = open(filename2, "rb")
|
34 |
tfidf_vectorizer = pickle.load(file_handle2)
|
35 |
file_handle2.close()
|
|
|
40 |
print(categories[predicted[0]])
|
41 |
return categories[predicted[0]]
|
42 |
|
43 |
+
# Porter Stemmer for English
|
44 |
+
stemmerEN = PorterStemmer()
|
45 |
|
46 |
# adding the text that will show in the text box
|
47 |
+
default_value = "ASKED 3MOBILE IF 0870 CHATLINES INCLU IN FREE MINS. INDIA CUST SERVs SED YES. L8ER GOT MEGA BILL. 3 DONT GIV A SHIT. BAILIFF DUE IN DAYS. I O £250 3 WANT £800"
|
48 |
text = st.text_area("enter some text!", default_value)
|
49 |
if text:
|
50 |
out = predictSMSdata(text)
|