|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
import tensorflow_hub as tf_hub |
|
from tensorflow.keras.models import load_model |
|
import re |
|
import nltk |
|
nltk.download('punkt') |
|
from nltk.tokenize import word_tokenize |
|
nltk.download('stopwords') |
|
from nltk.corpus import stopwords |
|
nltk.download('wordnet') |
|
from nltk.stem import WordNetLemmatizer |
|
|
|
|
|
loaded_model = load_model('model_rnn') |
|
|
|
|
|
label_dict = {0: 'Uang Masuk', 1: 'Uang Keluar', 2: 'Pinjaman', 3: 'Tagihan', 4: 'Top Up', |
|
5: 'Biaya & Lainnya', 6: 'Transportasi', 7: 'Pendidikan', 8: 'Hadiah & Amal', |
|
9: 'Belanja', 10: 'Hiburan',11: 'Makanan & Minuman', 12: 'Kesehatan', |
|
13: 'Perawatan Diri', 14: 'Hobi & Gaya Hidup', 15: 'Pencairan Investasi', |
|
16: 'Tabungan & Investasi'} |
|
|
|
def preprocessing(text): |
|
''' |
|
Preprocessing text by applying lowercasing, normalization, tokenization, stopword removal, and lemmatization |
|
''' |
|
|
|
text = text.lower() |
|
|
|
|
|
text = re.sub(r'\d+', '', text) |
|
text = re.sub(r'[^\w\s]', '', text) |
|
text = re.sub(r'\s+', ' ', text).strip() |
|
|
|
|
|
tokens = word_tokenize(text) |
|
|
|
|
|
stop_words = set(stopwords.words('indonesian')) |
|
stop_words.update(['the', 'yg', 'gk', 'nyagak', 'pake', 'pakai', 'i', "and"]) |
|
|
|
|
|
tokens = [word for word in tokens if word not in stop_words] |
|
|
|
|
|
lemmatizer = WordNetLemmatizer() |
|
tokens = [lemmatizer.lemmatize(word) for word in tokens] |
|
|
|
|
|
text = ' '.join(tokens) |
|
|
|
return text |
|
|
|
def run(): |
|
st.title('Notes Categorization') |
|
|
|
default = "konser twice" |
|
|
|
user_input = st.text_area("Enter the notes text here:", default, height=50) |
|
|
|
if st.button('Predict'): |
|
|
|
text_processed = preprocessing(user_input) |
|
|
|
|
|
|
|
preprocessed_notes = np.expand_dims(text_processed, axis=0) |
|
|
|
|
|
predictions = loaded_model.predict(preprocessed_notes) |
|
|
|
|
|
predicted_class = np.argmax(predictions[0]) |
|
|
|
|
|
predicted_category = label_dict[predicted_class] |
|
|
|
st.write(f'The predicted category is: {predicted_category}') |
|
|
|
if __name__ == '__main__': |
|
main() |