Spaces:
Runtime error
Runtime error
File size: 1,436 Bytes
4b10007 e318707 c54ba2f 6f7b5ee 07dd827 4b10007 9a63d60 c83f950 4ae52b2 c83f950 4b10007 61d120e 4b10007 6f7b5ee 8afa1c0 6f7b5ee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
from sklearn.feature_extraction.text import TfidfVectorizer
from TurkishStemmer import TurkishStemmer
import string
# import for loading python objects (scikit-learn models)
import pickle
import nltk
from nltk.data import load
import streamlit as st
import sklearn
def custom_tokenizer_with_Turkish_stemmer(text):
# tokenize text
# tokens = text.split(" ")
tokenizer = load("turkish.pickle")
tokens = tokenizer.tokenize(text)
print(tokens)
stems = [stemmerTR.stem(item.lower()) for item in tokens]
return stems
def predictSMSdata(test_text):
categories = ["legitimate", "spam"]
categories.sort()
# load model
filename1 = "LinearSVC_SMS_spam_TR.pickle"
file_handle1 = open(filename1, "rb")
classifier = pickle.load(file_handle1)
file_handle1.close()
# load tfidf_vectorizer for transforming test text data
filename2 = "tfidf_vectorizer_TR.pickle"
file_handle2 = open(filename2, "rb")
tfidf_vectorizer = pickle.load(file_handle2)
file_handle2.close()
test_list=[test_text]
tfidf_vectorizer_vectors_test = tfidf_vectorizer.transform(test_list)
predicted = classifier.predict(tfidf_vectorizer_vectors_test)
print(categories[predicted[0]])
return categories[predicted[0]]
stemmerTR = TurkishStemmer()
text = st.text_area("enter some text!")
if text:
out = predictSMSdata(text)
st.write("The category of SMS = " + out.upper())
|