vjdevane commited on
Commit
a7fa880
·
verified ·
1 Parent(s): 98ec8d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +188 -2
app.py CHANGED
@@ -1,4 +1,190 @@
1
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- x = st.slider('Select a value')
4
- st.write(x, 'squared is', x * x)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import joblib
3
+ import pickle
4
+ import pandas as pd
5
+ import nltk
6
+ #nltk.download('stopwords')
7
+ from nltk.corpus import stopwords
8
+ #nltk.download('punkt')
9
+ from nltk.tokenize import punkt
10
+ #nltk.download('wordnet')
11
+ from nltk.corpus.reader import wordnet
12
+ #nltk.download('WordNetLemmatizer')
13
+ from nltk.stem import WordNetLemmatizer
14
+ from sklearn.feature_extraction.text import TfidfVectorizer
15
+ import spacy
16
+ import matplotlib as plt
17
 
18
+
19
+ def main():
20
+ """ Blooms Taxonomy classifier"""
21
+
22
+ st.title("Blooms Taxonomy Classifier")
23
+ st.subheader("ML App for Blooms Taxonomy Level Prediction")
24
+
25
+ activities = ["Prediction","About"]
26
+ choice =st.sidebar.selectbox("Choose Activity",activities )
27
+
28
+ if choice == "Prediction":
29
+ path_tfidf = "tfidf.pickle"
30
+
31
+ with open(path_tfidf, 'rb') as data:
32
+ tfidf = pickle.load(data)
33
+ category_codes = {
34
+ 'BT1 - Knowledge': 0,
35
+ 'BT2 - Comprehension': 1,
36
+ 'BT3 - Application': 2,
37
+ 'BT4 - Analysis': 3,
38
+ 'BT5 - Evaluation': 4,
39
+ 'BT6 - Creation': 5
40
+ }
41
+
42
+ punctuation_signs = list("?:!.,;")
43
+ stop_words = list(stopwords.words('english'))
44
+
45
+ def create_features_from_text(text):
46
+
47
+ # Dataframe creation
48
+ lemmatized_text_list = []
49
+ df = pd.DataFrame(columns=['Questions'])
50
+ df.loc[0] = text
51
+ df['Questions_Parsed_1'] = df['Questions'].str.replace("\r", " ")
52
+ df['Questions_Parsed_1'] = df['Questions_Parsed_1'].str.replace("\n", " ")
53
+ df['Questions_Parsed_1'] = df['Questions_Parsed_1'].str.replace(" ", " ")
54
+ df['Questions_Parsed_1'] = df['Questions_Parsed_1'].str.replace('"', '')
55
+ df['Questions_Parsed_2'] = df['Questions_Parsed_1'].str.lower()
56
+ df['Questions_Parsed_3'] = df['Questions_Parsed_2']
57
+ for punct_sign in punctuation_signs:
58
+ df['Questions_Parsed_3'] = df['Questions_Parsed_3'].str.replace(punct_sign, '')
59
+ df['Questions_Parsed_4'] = df['Questions_Parsed_3'].str.replace("'s", "")
60
+ wordnet_lemmatizer = WordNetLemmatizer()
61
+ lemmatized_list = []
62
+ text = df.loc[0]['Questions_Parsed_4']
63
+ text_words = text.split(" ")
64
+ for word in text_words:
65
+ lemmatized_list.append(wordnet_lemmatizer.lemmatize(word, pos="v"))
66
+ lemmatized_text = " ".join(lemmatized_list)
67
+ lemmatized_text_list.append(lemmatized_text)
68
+ df['Questions_Parsed_5'] = lemmatized_text_list
69
+ df['Questions_Parsed_6'] = df['Questions_Parsed_5']
70
+ for stop_word in stop_words:
71
+ regex_stopword = r"\b" + stop_word + r"\b"
72
+ df['Questions_Parsed_6'] = df['Questions_Parsed_6'].str.replace(regex_stopword, '')
73
+ df = df['Questions_Parsed_6']
74
+ df = df.rename({'Questions_Parsed_6': 'Questions_Parsed'})
75
+
76
+ # TF-IDF
77
+ features = tfidf.transform(df).toarray()
78
+
79
+ return features
80
+
81
+ def get_category_name(category_id):
82
+ for category, id_ in category_codes.items():
83
+ if id_ == category_id:
84
+ return category
85
+ def predict_from_text(text):
86
+ path_lr = 'best_lrc.pickle'
87
+ with open(path_lr, 'rb') as data:
88
+ lr_model = pickle.load(data)
89
+
90
+ path_mnb = 'best_mnbc.pickle'
91
+ with open(path_mnb, 'rb') as data:
92
+ mnb_model = pickle.load(data)
93
+
94
+ path_gbc = 'best_gbc.pickle'
95
+ with open(path_gbc, 'rb') as data:
96
+ gbc_model = pickle.load(data)
97
+
98
+ path_rfc = 'best_rfc.pickle'
99
+ with open(path_rfc, 'rb') as data:
100
+ rfc_model = pickle.load(data)
101
+
102
+ path_knn = 'best_knnc.pickle'
103
+ with open(path_knn, 'rb') as data:
104
+ knn_model = pickle.load(data)
105
+
106
+ path_svm = 'best_svc.pickle'
107
+ with open(path_svm, 'rb') as data:
108
+ svc_model = pickle.load(data)
109
+
110
+ # Predict using the input model
111
+ prediction_lr = lr_model.predict(create_features_from_text(text))[0]
112
+ prediction_lr_proba = lr_model.predict_proba(create_features_from_text(text))[0]
113
+ prediction_mnb = mnb_model.predict(create_features_from_text(text))[0]
114
+ prediction_mnb_proba = mnb_model.predict_proba(create_features_from_text(text))[0]
115
+ prediction_gbc = gbc_model.predict(create_features_from_text(text))[0]
116
+ prediction_gbc_proba = gbc_model.predict_proba(create_features_from_text(text))[0]
117
+ prediction_rfc = rfc_model.predict(create_features_from_text(text))[0]
118
+ prediction_rfc_proba = svc_model.predict_proba(create_features_from_text(text))[0]
119
+ prediction_knn = knn_model.predict(create_features_from_text(text))[0]
120
+ prediction_knn_proba = svc_model.predict_proba(create_features_from_text(text))[0]
121
+ prediction_svc = svc_model.predict(create_features_from_text(text))[0]
122
+ prediction_svc_proba = svc_model.predict_proba(create_features_from_text(text))[0]
123
+
124
+ # Return result
125
+ category_lr = get_category_name(prediction_lr)
126
+ category_mnb = get_category_name(prediction_mnb)
127
+ category_gbc = get_category_name(prediction_gbc)
128
+ category_rfc = get_category_name(prediction_rfc)
129
+ category_knn = get_category_name(prediction_knn)
130
+ category_svc = get_category_name(prediction_svc)
131
+ a=prediction_lr_proba.max()*100
132
+ b=prediction_mnb_proba.max()*100
133
+ c=prediction_gbc_proba.max()*100
134
+ d=prediction_rfc_proba.max()*100
135
+ e=prediction_knn_proba.max()*100
136
+ f=prediction_svc_proba.max()*100
137
+ best_one = {"category_lr":prediction_lr_proba.max()*100,"category_mnb":prediction_mnb_proba.max()*100,"category_gbc":prediction_gbc_proba.max()*100,"category_rfc":prediction_rfc_proba.max()*100,"category_knn":prediction_knn_proba.max()*100,"category_svc":prediction_svc_proba.max()*100}
138
+ keymax = max(best_one, key = best_one.get)
139
+ if keymax == "category_lr":
140
+ return category_lr, best_one["category_lr"],a,b,c,d,e,f
141
+ elif keymax == "category_mnb":
142
+ return category_mnb,best_one["category_mnb"],a,b,c,d,e,f
143
+ elif keymax == "category_gbc":
144
+ return category_gbc,best_one["category_gbc"],a,b,c,d,e,f
145
+ elif keymax == "category_rfc":
146
+ return category_rfc,best_one["category_rfc"],a,b,c,d,e,f
147
+ elif keymax == "category_knn":
148
+ return category_knn,best_one["category_knn"],a,b,c,d,e,f
149
+ else:
150
+ return category_svc,best_one["category_svc"],a,b,c,d,e,f
151
+
152
+
153
+
154
+ st.info("Prediction with Various Models")
155
+
156
+ bt_text = st.text_area("Question to Predict","Typer Here")
157
+
158
+ if st.button("Classify"):
159
+ st.text("Original Text ::\n{}".format(bt_text))
160
+
161
+
162
+
163
+ prediction = predict_from_text(bt_text)
164
+
165
+ st.success("Blooms Taxonomy Level :: {}".format(prediction[0]))
166
+ st.success("Maximum Probability :: {}".format(prediction[1]))
167
+ st.write("Performance of Various Algorithms")
168
+
169
+ data = pd.DataFrame({
170
+ 'Various Algorithm': ['Logistic Regression', 'Multinomial Naive Bayes', 'Gradient Boosting Classifier','Random Forest Classifier','k-Nearest Neighbors','Support Vector Machine'],
171
+ 'Maximum Accuracy': [(prediction[2]),prediction[3],prediction[4],prediction[5],prediction[6],prediction[7]],
172
+ }).set_index('Various Algorithm')
173
+
174
+ st.write(data)
175
+ st.bar_chart(data)
176
+
177
+
178
+
179
+ if choice == "About":
180
+ st.success("This web app is developed by Vijay Devane.")
181
+ hide_streamlit_style = """
182
+ <style>
183
+ #MainMenu {visibility: hidden;}
184
+ footer {visibility: hidden;}
185
+ </style>
186
+ """
187
+ st.markdown(hide_streamlit_style, unsafe_allow_html=True)
188
+
189
+ if __name__ =='__main__':
190
+ main()