akuysal commited on
Commit
5b51870
·
1 Parent(s): c54ba2f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -8,11 +8,12 @@ from nltk.data import load
8
  import streamlit as st
9
  import sklearn
10
 
 
 
11
  def custom_tokenizer_with_Turkish_stemmer(text):
12
  # tokenize text
13
  # tokens = text.split(" ")
14
- tokenizer = load("turkish.pickle")
15
- tokens = tokenizer.tokenize(text)
16
  print(tokens)
17
  stems = [stemmerTR.stem(item.lower()) for item in tokens]
18
  return stems
 
8
  import streamlit as st
9
  import sklearn
10
 
11
+ trans_table = {ord(c): None for c in string.punctuation + string.digits}
12
+
13
  def custom_tokenizer_with_Turkish_stemmer(text):
14
  # tokenize text
15
  # tokens = text.split(" ")
16
+ tokens = [word for word in nltk.word_tokenize(text.translate(trans_table))]
 
17
  print(tokens)
18
  stems = [stemmerTR.stem(item.lower()) for item in tokens]
19
  return stems