Spaces:

akuysal
/

SMS-spam-Turkish-sklearn

Runtime error

akuysal commited on Mar 18, 2023

Commit

9a63d60

1 Parent(s): 634ccb3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from sklearn.feature_extraction.text import TfidfVectorizer
 from TurkishStemmer import TurkishStemmer
-import nltk
 import string
 # import for loading python objects (scikit-learn models)
 import pickle
@@ -8,8 +7,8 @@ import streamlit as st
 import sklearn
 def custom_tokenizer_with_Turkish_stemmer(text):
-    # my text was unicode so I had to use the unicode-specific translate function. If your documents are strings, you will need to use a different `translate` function here. `Translated` here just does search-replace. See the trans_table: any matching character in the set is replaced with `None`
-    tokens = [word for word in nltk.word_tokenize(text.translate(trans_table))]
     stems = [stemmerTR.stem(item.lower()) for item in tokens]
     return stems
@@ -34,7 +33,6 @@ def predictSMSdata(test_text):
     predicted = classifier.predict(tfidf_vectorizer_vectors_test)
     print(categories[predicted[0]])
-trans_table = {ord(c): None for c in string.punctuation + string.digits}
 stemmerTR = TurkishStemmer()
 text = st.text_area("enter some text!")

 from sklearn.feature_extraction.text import TfidfVectorizer
 from TurkishStemmer import TurkishStemmer
 import string
 # import for loading python objects (scikit-learn models)
 import pickle
 import sklearn
 def custom_tokenizer_with_Turkish_stemmer(text):
+    # tokenize text
+    tokens = text.split(" ")
     stems = [stemmerTR.stem(item.lower()) for item in tokens]
     return stems
     predicted = classifier.predict(tfidf_vectorizer_vectors_test)
     print(categories[predicted[0]])
 stemmerTR = TurkishStemmer()
 text = st.text_area("enter some text!")