akuysal commited on
Commit
9a63d60
·
1 Parent(s): 634ccb3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -4
app.py CHANGED
@@ -1,6 +1,5 @@
1
  from sklearn.feature_extraction.text import TfidfVectorizer
2
  from TurkishStemmer import TurkishStemmer
3
- import nltk
4
  import string
5
  # import for loading python objects (scikit-learn models)
6
  import pickle
@@ -8,8 +7,8 @@ import streamlit as st
8
  import sklearn
9
 
10
  def custom_tokenizer_with_Turkish_stemmer(text):
11
- # my text was unicode so I had to use the unicode-specific translate function. If your documents are strings, you will need to use a different `translate` function here. `Translated` here just does search-replace. See the trans_table: any matching character in the set is replaced with `None`
12
- tokens = [word for word in nltk.word_tokenize(text.translate(trans_table))]
13
  stems = [stemmerTR.stem(item.lower()) for item in tokens]
14
  return stems
15
 
@@ -34,7 +33,6 @@ def predictSMSdata(test_text):
34
  predicted = classifier.predict(tfidf_vectorizer_vectors_test)
35
  print(categories[predicted[0]])
36
 
37
- trans_table = {ord(c): None for c in string.punctuation + string.digits}
38
  stemmerTR = TurkishStemmer()
39
 
40
  text = st.text_area("enter some text!")
 
1
  from sklearn.feature_extraction.text import TfidfVectorizer
2
  from TurkishStemmer import TurkishStemmer
 
3
  import string
4
  # import for loading python objects (scikit-learn models)
5
  import pickle
 
7
  import sklearn
8
 
9
  def custom_tokenizer_with_Turkish_stemmer(text):
10
+ # tokenize text
11
+ tokens = text.split(" ")
12
  stems = [stemmerTR.stem(item.lower()) for item in tokens]
13
  return stems
14
 
 
33
  predicted = classifier.predict(tfidf_vectorizer_vectors_test)
34
  print(categories[predicted[0]])
35
 
 
36
  stemmerTR = TurkishStemmer()
37
 
38
  text = st.text_area("enter some text!")