didev007 commited on
Commit
24c0483
·
verified ·
1 Parent(s): 34030df

Delete prediction.py

Browse files
Files changed (1) hide show
  1. prediction.py +0 -82
prediction.py DELETED
@@ -1,82 +0,0 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import numpy as np
4
- from tensorflow.keras.models import load_model
5
- import re
6
- import nltk
7
- nltk.download('punkt')
8
- from nltk.tokenize import word_tokenize
9
- nltk.download('stopwords')
10
- from nltk.corpus import stopwords
11
- nltk.download('wordnet')
12
- from nltk.stem import WordNetLemmatizer
13
-
14
- # Load the model
15
- loaded_model = load_model('model_rnn')
16
-
17
- # Create a dictionary to map the labels to the categories
18
- label_dict = {0: 'Uang Masuk', 1: 'Uang Keluar', 2: 'Pinjaman', 3: 'Tagihan', 4: 'Top Up',
19
- 5: 'Biaya & Lainnya', 6: 'Transportasi', 7: 'Pendidikan', 8: 'Hadiah & Amal',
20
- 9: 'Belanja', 10: 'Hiburan',11: 'Makanan & Minuman', 12: 'Kesehatan',
21
- 13: 'Perawatan Diri', 14: 'Hobi & Gaya Hidup', 15: 'Pencairan Investasi',
22
- 16: 'Tabungan & Investasi'}
23
-
24
- def preprocessing(text):
25
- '''
26
- Preprocessing text by applying lowercasing, normalization, tokenization, stopword removal, and lemmatization
27
- '''
28
- # Lowercase the text
29
- text = text.lower()
30
-
31
- # Normalize the text
32
- text = re.sub(r'\d+', '', text) # Remove numbers
33
- text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
34
- text = re.sub(r'\s+', ' ', text).strip() # Remove whitespaces
35
-
36
- # Tokenize the text
37
- tokens = word_tokenize(text)
38
-
39
- # Get the English stopwords
40
- stop_words = set(stopwords.words('indonesian'))
41
- stop_words.update(['the', 'yg', 'gk', 'nyagak', 'pake', 'pakai', 'i', "and"])
42
-
43
- # Remove stopwords
44
- tokens = [word for word in tokens if word not in stop_words]
45
-
46
- # Lemmatize the text
47
- lemmatizer = WordNetLemmatizer()
48
- tokens = [lemmatizer.lemmatize(word) for word in tokens]
49
-
50
- # Combine tokens back into a single string
51
- text = ' '.join(tokens)
52
-
53
- return text
54
-
55
- def run():
56
- st.title('Notes Categorization')
57
-
58
- default = "konser twice"
59
-
60
- user_input = st.text_area("Enter the notes text here:", default, height=50)
61
-
62
- if st.button('Predict'):
63
- # Apply the function to the 'Text' column in the data
64
- text_processed = preprocessing(user_input)
65
-
66
- # The model expects input data in batch, even if just predicting on one sample
67
- # So, I'll add an extra dimension with np.expand_dims
68
- preprocessed_notes = np.expand_dims(text_processed, axis=0)
69
-
70
- # get the prediction
71
- predictions = loaded_model.predict(preprocessed_notes)
72
-
73
- # get the class with the highest probability
74
- predicted_class = np.argmax(predictions[0])
75
-
76
- # Decode the predicted class into the original category
77
- predicted_category = label_dict[predicted_class]
78
-
79
- st.write(f'The predicted category is: {predicted_category}')
80
-
81
- if __name__ == '__main__':
82
- main()