Spaces:
Sleeping
Sleeping
Upload 3 files
Browse files- pages/2_π_QuickML.py +839 -0
- pages/3_π_StudyML.py +0 -0
- pages/4_π_About.py +125 -0
pages/2_π_QuickML.py
ADDED
@@ -0,0 +1,839 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import PIL.Image as Image
|
5 |
+
|
6 |
+
# Config
|
7 |
+
page_icon = Image.open("./assets/icon.png")
|
8 |
+
st.set_page_config(layout="centered", page_title="Click ML", page_icon=page_icon)
|
9 |
+
|
10 |
+
if 'df' not in st.session_state:
|
11 |
+
st.session_state.df = None
|
12 |
+
|
13 |
+
if "delete_features" not in st.session_state:
|
14 |
+
st.session_state.delete_features = None
|
15 |
+
|
16 |
+
if "missing_done" not in st.session_state:
|
17 |
+
st.session_state.missing_done = False
|
18 |
+
|
19 |
+
if "cat_enc_done" not in st.session_state:
|
20 |
+
st.session_state.cat_enc_done = False
|
21 |
+
|
22 |
+
if "num_scale_done" not in st.session_state:
|
23 |
+
st.session_state.num_scale_done = False
|
24 |
+
|
25 |
+
if "split_done" not in st.session_state:
|
26 |
+
st.session_state.split_done = False
|
27 |
+
|
28 |
+
if "X_train" not in st.session_state:
|
29 |
+
st.session_state.X_train = None
|
30 |
+
|
31 |
+
if "X_test" not in st.session_state:
|
32 |
+
st.session_state.X_test = None
|
33 |
+
|
34 |
+
if "y_train" not in st.session_state:
|
35 |
+
st.session_state.y_train = None
|
36 |
+
|
37 |
+
if "y_test" not in st.session_state:
|
38 |
+
st.session_state.y_test = None
|
39 |
+
|
40 |
+
if "X_val" not in st.session_state:
|
41 |
+
st.session_state.X_val = None
|
42 |
+
|
43 |
+
if "y_val" not in st.session_state:
|
44 |
+
st.session_state.y_val = None
|
45 |
+
|
46 |
+
if "split_type" not in st.session_state:
|
47 |
+
st.session_state.split_type = None
|
48 |
+
|
49 |
+
if "build_model_done" not in st.session_state:
|
50 |
+
st.session_state.build_model_done = False
|
51 |
+
|
52 |
+
if "no_svm" not in st.session_state:
|
53 |
+
st.session_state.no_svm = False
|
54 |
+
|
55 |
+
def new_line():
|
56 |
+
st.write("\n")
|
57 |
+
|
58 |
+
with st.sidebar:
|
59 |
+
st.image("./assets/sb-quick.png", use_column_width=True)
|
60 |
+
|
61 |
+
|
62 |
+
st.markdown("<h1 style='text-align: center; '>π QuickML</h1>", unsafe_allow_html=True)
|
63 |
+
st.markdown("QuickML is a tool that helps you to build a Machine Learning model in just a few clicks.", unsafe_allow_html=True)
|
64 |
+
st.divider()
|
65 |
+
|
66 |
+
|
67 |
+
st.header("Upload Your CSV File", anchor=False)
|
68 |
+
uploaded_file = st.file_uploader("Upload Your CSV File", type=["csv"])
|
69 |
+
st.divider()
|
70 |
+
|
71 |
+
if uploaded_file:
|
72 |
+
|
73 |
+
# Read the CSV File
|
74 |
+
if st.session_state.df is None:
|
75 |
+
df = pd.read_csv(uploaded_file)
|
76 |
+
else:
|
77 |
+
df = st.session_state.df
|
78 |
+
# st.dataframe(df)
|
79 |
+
new_line()
|
80 |
+
|
81 |
+
# The Dataset
|
82 |
+
st.subheader("π¬ Dataset", anchor=False)
|
83 |
+
new_line()
|
84 |
+
st.dataframe(df, use_container_width=True)
|
85 |
+
new_line()
|
86 |
+
|
87 |
+
# Delete Features from the dataset
|
88 |
+
st.subheader("ποΈ Delete Features", anchor=False)
|
89 |
+
new_line()
|
90 |
+
if not st.session_state.delete_features:
|
91 |
+
delete_features = st.multiselect("Select the features you want to delete from the dataset", df.columns.tolist())
|
92 |
+
new_line()
|
93 |
+
if delete_features:
|
94 |
+
col1, col2, col3 = st.columns([1, 0.5, 1])
|
95 |
+
if col2.button("Apply", key="delete"):
|
96 |
+
st.session_state.delete_features = True
|
97 |
+
st.session_state.df = df.drop(delete_features, axis=1)
|
98 |
+
|
99 |
+
if st.session_state.delete_features:
|
100 |
+
st.success("Features deleted successfully. You can now proceed to Handling Missing Values.")
|
101 |
+
|
102 |
+
# Missing Values
|
103 |
+
st.subheader("β οΈ Missing Values", anchor=False)
|
104 |
+
if sum(df.isnull().sum().values.tolist()) != 0:
|
105 |
+
new_line()
|
106 |
+
# st.warning("There are missing values in the dataset. Please handle them before proceeding.")
|
107 |
+
new_line()
|
108 |
+
|
109 |
+
col1, col2 = st.columns(2)
|
110 |
+
col1.markdown("<h6 style='text-align: center; '>Handling Numerical Features</h3>", unsafe_allow_html=True)
|
111 |
+
col1.write("\n")
|
112 |
+
missing_num_meth = col1.selectbox("Select the method to handle missing values in numerical features", ["Mean", "Median", "Mode", "ffil and bfil", "Drop the rows"])
|
113 |
+
if df.select_dtypes(include=np.number).columns.tolist():
|
114 |
+
num_feat = df.select_dtypes(include=np.number).columns.tolist()
|
115 |
+
|
116 |
+
col2.markdown("<h6 style='text-align: center; '>Handling Categorical Features</h3>", unsafe_allow_html=True)
|
117 |
+
col2.write("\n")
|
118 |
+
missing_cat_meth = col2.selectbox("Select the method to handle missing values in categorical features", ["Mode", "Drop the rows"])
|
119 |
+
if df.select_dtypes(include=np.object).columns.tolist():
|
120 |
+
cat_feat = df.select_dtypes(include=np.object).columns.tolist()
|
121 |
+
|
122 |
+
new_line()
|
123 |
+
|
124 |
+
if missing_num_meth and missing_cat_meth:
|
125 |
+
cola, colb, colc = st.columns([1,0.5,1])
|
126 |
+
if colb.button("Apply", key="missing"):
|
127 |
+
st.session_state.missing_done = True
|
128 |
+
# If Numerical Features are present
|
129 |
+
if df.select_dtypes(include=np.number).columns.tolist():
|
130 |
+
if missing_num_meth == "Mean":
|
131 |
+
from sklearn.impute import SimpleImputer
|
132 |
+
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
|
133 |
+
df[num_feat] = imputer.fit_transform(df[num_feat])
|
134 |
+
st.session_state.df = df
|
135 |
+
|
136 |
+
elif missing_num_meth == "Median":
|
137 |
+
from sklearn.impute import SimpleImputer
|
138 |
+
imputer = SimpleImputer(missing_values=np.nan, strategy='median')
|
139 |
+
df[num_feat] = imputer.fit_transform(df[num_feat])
|
140 |
+
st.session_state.df = df
|
141 |
+
|
142 |
+
elif missing_num_meth == "Mode":
|
143 |
+
from sklearn.impute import SimpleImputer
|
144 |
+
imputer = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
|
145 |
+
df[num_feat] = imputer.fit_transform(df[num_feat])
|
146 |
+
st.session_state.df = df
|
147 |
+
|
148 |
+
elif missing_num_meth == "ffil and bfil":
|
149 |
+
df[num_feat] = df[num_feat].fillna(method='ffill').fillna(method='bfill')
|
150 |
+
st.session_state.df = df
|
151 |
+
|
152 |
+
elif missing_num_meth == "Drop the rows":
|
153 |
+
df[num_feat].dropna(inplace=True)
|
154 |
+
st.session_state.df = df
|
155 |
+
|
156 |
+
# If Categorical Features are present
|
157 |
+
if df.select_dtypes(include=np.object).columns.tolist():
|
158 |
+
|
159 |
+
if missing_cat_meth == "Mode":
|
160 |
+
from sklearn.impute import SimpleImputer
|
161 |
+
imputer = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
|
162 |
+
df[cat_feat] = imputer.fit_transform(df[cat_feat])
|
163 |
+
st.session_state.df = df
|
164 |
+
|
165 |
+
elif missing_cat_meth == "Drop the rows":
|
166 |
+
df[cat_feat].dropna(inplace=True)
|
167 |
+
st.session_state.df = df
|
168 |
+
|
169 |
+
st.success("Missing values handled successfully. You can now proceed to Encoding Categorical Features.")
|
170 |
+
else:
|
171 |
+
st.session_state.missing_done = True
|
172 |
+
st.success("No missing values found in the dataset.")
|
173 |
+
|
174 |
+
|
175 |
+
# Encoding Categorical Features
|
176 |
+
if st.session_state.missing_done:
|
177 |
+
new_line()
|
178 |
+
st.subheader("β’οΈ Encoding Categorical Features", anchor=False)
|
179 |
+
new_line()
|
180 |
+
|
181 |
+
if len(df.select_dtypes(include=np.object).columns.tolist()) > 0:
|
182 |
+
# st.warning("There are categorical features in the dataset. Please encode them before proceeding.")
|
183 |
+
new_line()
|
184 |
+
|
185 |
+
st.markdown("<h6 style='text-align: center; '>Select the method to encode categorical features</h3>", unsafe_allow_html=True)
|
186 |
+
new_line()
|
187 |
+
cat_enc_meth = st.selectbox("Select the method to encode categorical features", ["Ordinal Encoding", "One Hot Encoding", "Count Frequency Encoding"])
|
188 |
+
new_line()
|
189 |
+
|
190 |
+
if cat_enc_meth:
|
191 |
+
col1, col2, col3 = st.columns([1, 0.5, 1])
|
192 |
+
if col2.button("Apply", key="cat_enc"):
|
193 |
+
st.session_state.cat_enc_done = True
|
194 |
+
cat_cols = df.select_dtypes(include=np.object).columns.tolist()
|
195 |
+
|
196 |
+
if cat_enc_meth == "Ordinal Encoding":
|
197 |
+
from sklearn.preprocessing import OrdinalEncoder
|
198 |
+
oe = OrdinalEncoder()
|
199 |
+
df[cat_cols] = oe.fit_transform(df[cat_cols])
|
200 |
+
st.session_state.df = df
|
201 |
+
|
202 |
+
elif cat_enc_meth == "One Hot Encoding":
|
203 |
+
df = pd.get_dummies(df, columns=cat_cols)
|
204 |
+
st.session_state.df = df
|
205 |
+
|
206 |
+
elif cat_enc_meth == "Count Frequency Encoding":
|
207 |
+
for col in cat_cols:
|
208 |
+
df[col] = df[col].map(df[col].value_counts() / len(df))
|
209 |
+
st.session_state.df = df
|
210 |
+
|
211 |
+
st.success("Categorical features encoded successfully. You can now proceed to Scaling & Transformation.")
|
212 |
+
|
213 |
+
else:
|
214 |
+
st.session_state.cat_enc_done = True
|
215 |
+
st.success("No categorical features found in the dataset.")
|
216 |
+
|
217 |
+
# Scaling & Transforming Numerical Features
|
218 |
+
if st.session_state.cat_enc_done and st.session_state.missing_done:
|
219 |
+
new_line()
|
220 |
+
st.subheader("𧬠Scaling & Transformation", anchor=False)
|
221 |
+
new_line()
|
222 |
+
|
223 |
+
if not st.session_state.num_scale_done:
|
224 |
+
if len(df.select_dtypes(include=np.number).columns.tolist()) > 0:
|
225 |
+
# st.info("There are numerical features in the dataset. You can Scale and Transform them.")
|
226 |
+
new_line()
|
227 |
+
|
228 |
+
st.markdown("<h6 style='text-align: left; '>Select the method to scale and transform numerical features</h3>", unsafe_allow_html=True)
|
229 |
+
new_line()
|
230 |
+
col1, col2 = st.columns(2)
|
231 |
+
not_scale = col1.multiselect("Select the features you **don't** want to scale and transform **__Include the traget feature if it is Classification problem__**", df.select_dtypes(include=np.number).columns.tolist())
|
232 |
+
num_scale_meth = col2.selectbox("Select the method to scale and transform numerical features", ["Standard Scaler", "MinMax Scaler", "Robust Scaler", "Log Transformation", "Square Root Transformation"])
|
233 |
+
new_line()
|
234 |
+
|
235 |
+
if num_scale_meth:
|
236 |
+
col1, col2, col3 = st.columns([1, 0.5, 1])
|
237 |
+
if col2.button("Apply", key="num_scale"):
|
238 |
+
st.session_state.num_scale_done = True
|
239 |
+
if not_scale:
|
240 |
+
num_cols = df.select_dtypes(include=np.number).columns.tolist()
|
241 |
+
# Delete the features that are not selected
|
242 |
+
for not_scale_feat in not_scale:
|
243 |
+
num_cols.remove(not_scale_feat)
|
244 |
+
|
245 |
+
else:
|
246 |
+
num_cols = df.select_dtypes(include=np.number).columns.tolist()
|
247 |
+
|
248 |
+
if num_scale_meth == "Standard Scaler":
|
249 |
+
from sklearn.preprocessing import StandardScaler
|
250 |
+
ss = StandardScaler()
|
251 |
+
df[num_cols] = ss.fit_transform(df[num_cols])
|
252 |
+
st.session_state.df = df
|
253 |
+
|
254 |
+
elif num_scale_meth == "MinMax Scaler":
|
255 |
+
from sklearn.preprocessing import MinMaxScaler
|
256 |
+
mms = MinMaxScaler()
|
257 |
+
df[num_cols] = mms.fit_transform(df[num_cols])
|
258 |
+
st.session_state.df = df
|
259 |
+
|
260 |
+
elif num_scale_meth == "Robust Scaler":
|
261 |
+
from sklearn.preprocessing import RobustScaler
|
262 |
+
rs = RobustScaler()
|
263 |
+
df[num_cols] = rs.fit_transform(df[num_cols])
|
264 |
+
st.session_state.df = df
|
265 |
+
|
266 |
+
elif num_scale_meth == "Log Transformation":
|
267 |
+
df[num_cols] = np.log(df[num_cols])
|
268 |
+
st.session_state.df = df
|
269 |
+
|
270 |
+
elif num_scale_meth == "Square Root Transformation":
|
271 |
+
df[num_cols] = np.sqrt(df[num_cols])
|
272 |
+
st.session_state.df = df
|
273 |
+
|
274 |
+
st.success("Numerical features scaled and transformed successfully. You can now proceed to Splitting the dataset.")
|
275 |
+
else:
|
276 |
+
st.warning("No numerical features found in the dataset. There is something wrong with the dataset. Please check it again.")
|
277 |
+
else:
|
278 |
+
st.session_state.num_scale_done = True
|
279 |
+
st.success("Numerical features scaled and transformed successfully. You can now proceed to Splitting the dataset.")
|
280 |
+
|
281 |
+
# Splitting the dataset
|
282 |
+
if st.session_state.cat_enc_done and st.session_state.missing_done:
|
283 |
+
new_line()
|
284 |
+
st.subheader("βοΈ Splitting the dataset", anchor=False)
|
285 |
+
new_line()
|
286 |
+
|
287 |
+
if not st.session_state.split_done:
|
288 |
+
# st.info("You can now split the dataset into Train, Validation and Test sets.")
|
289 |
+
new_line()
|
290 |
+
|
291 |
+
col1, col2 = st.columns(2)
|
292 |
+
target = col1.selectbox("Select the target variable", df.columns.tolist())
|
293 |
+
sets = col2.selectbox("Select the type of split", ["Train and Test", "Train, Validation and Test"])
|
294 |
+
st.session_state.split_type = sets
|
295 |
+
col1, col2, col3 = st.columns([1, 0.5, 1])
|
296 |
+
if col2.button("Apply", key="split"):
|
297 |
+
st.session_state.split_done = True
|
298 |
+
|
299 |
+
if sets == "Train and Test":
|
300 |
+
from sklearn.model_selection import train_test_split
|
301 |
+
X = df.drop(target, axis=1)
|
302 |
+
y = df[target]
|
303 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
304 |
+
st.session_state.X_train = X_train
|
305 |
+
st.session_state.X_test = X_test
|
306 |
+
st.session_state.y_train = y_train
|
307 |
+
st.session_state.y_test = y_test
|
308 |
+
st.success("Dataset split successfully. You can now proceed to Building the model.")
|
309 |
+
|
310 |
+
elif sets == "Train, Validation and Test":
|
311 |
+
from sklearn.model_selection import train_test_split
|
312 |
+
X = df.drop(target, axis=1)
|
313 |
+
y = df[target]
|
314 |
+
X_train, X_rem, y_train, y_rem = train_test_split(X, y, test_size=0.3, random_state=42)
|
315 |
+
X_test, X_val, y_test, y_val = train_test_split(X_rem, y_rem, test_size=0.5, random_state=42)
|
316 |
+
st.session_state.X_train = X_train
|
317 |
+
st.session_state.X_test = X_test
|
318 |
+
st.session_state.X_val = X_val
|
319 |
+
st.session_state.y_train = y_train
|
320 |
+
st.session_state.y_test = y_test
|
321 |
+
st.session_state.y_val = y_val
|
322 |
+
st.success("Dataset split successfully. You can now proceed to Building the model.")
|
323 |
+
|
324 |
+
else:
|
325 |
+
if len(str(st.session_state.split_type).split()) == 4:
|
326 |
+
st.success("Dataset split successfully into Training, Validation and Test sets. You can now proceed to Building the model.")
|
327 |
+
|
328 |
+
elif len(st.session_state.split_type.split()) == 3:
|
329 |
+
st.success("Dataset split successfully into Training and Test sets. You can now proceed to Building the model.")
|
330 |
+
|
331 |
+
# Building the model
|
332 |
+
if st.session_state.split_done:
|
333 |
+
new_line()
|
334 |
+
st.subheader("π§ Building the model", anchor=False)
|
335 |
+
target, problem_type, model = None, None, None
|
336 |
+
new_line()
|
337 |
+
|
338 |
+
col1, col2, col3 = st.columns(3)
|
339 |
+
target = col1.selectbox("Select the target variable", df.columns.tolist(), key="target_model")
|
340 |
+
problem_type = col2.selectbox("Select the problem type", ["Classification", "Regression"])
|
341 |
+
if problem_type == "Classification":
|
342 |
+
model = col3.selectbox("Select the model", ["Logistic Regression", "K Nearest Neighbors", "Support Vector Machine", "Decision Tree", "Random Forest", "XGBoost", "LightGBM", "CatBoost"])
|
343 |
+
elif problem_type == "Regression":
|
344 |
+
model = col3.selectbox("Select the model", ["Linear Regression", "K Nearest Neighbors", "Support Vector Machine", "Decision Tree", "Random Forest", "XGBoost", "LightGBM", "CatBoost"])
|
345 |
+
|
346 |
+
new_line()
|
347 |
+
if target and problem_type and model:
|
348 |
+
col1, col2, col3 = st.columns([1,0.8,1])
|
349 |
+
if col2.button("Apply", key="build_model", use_container_width=True):
|
350 |
+
st.session_state.build_model_done = True
|
351 |
+
if problem_type == "Classification":
|
352 |
+
|
353 |
+
if model == "Logistic Regression":
|
354 |
+
from sklearn.linear_model import LogisticRegression
|
355 |
+
import pickle
|
356 |
+
lr = LogisticRegression()
|
357 |
+
lr.fit(st.session_state.X_train, st.session_state.y_train)
|
358 |
+
|
359 |
+
pickle.dump(lr, open('model.pkl','wb'))
|
360 |
+
st.success("Model built successfully. You can now proceed to Evaluation.")
|
361 |
+
|
362 |
+
model_file = open("model.pkl", "rb")
|
363 |
+
model_bytes = model_file.read()
|
364 |
+
col2.download_button("Download Model", model_bytes, "model.pkl", key='class_log_reg', use_container_width=True)
|
365 |
+
|
366 |
+
elif model == "K Nearest Neighbors":
|
367 |
+
from sklearn.neighbors import KNeighborsClassifier
|
368 |
+
import pickle
|
369 |
+
|
370 |
+
knn = KNeighborsClassifier()
|
371 |
+
knn.fit(st.session_state.X_train, st.session_state.y_train)
|
372 |
+
|
373 |
+
pickle.dump(knn, open('model.pkl','wb'))
|
374 |
+
st.success("Model built successfully. You can now proceed to Evaluation.")
|
375 |
+
|
376 |
+
model_file = open("model.pkl", "rb")
|
377 |
+
model_bytes = model_file.read()
|
378 |
+
col2.download_button("Download Model", model_bytes, "model.pkl", key='class_knn', use_container_width=True)
|
379 |
+
|
380 |
+
|
381 |
+
|
382 |
+
elif model == "Support Vector Machine":
|
383 |
+
from sklearn.svm import SVC
|
384 |
+
import pickle
|
385 |
+
st.session_state.no_svm = True
|
386 |
+
|
387 |
+
svm = SVC()
|
388 |
+
svm.fit(st.session_state.X_train, st.session_state.y_train)
|
389 |
+
|
390 |
+
pickle.dump(svm, open('model.pkl','wb'))
|
391 |
+
st.success("Model built successfully. You can now proceed to Evaluation.")
|
392 |
+
|
393 |
+
model_file = open("model.pkl", "rb")
|
394 |
+
model_bytes = model_file.read()
|
395 |
+
col2.download_button("Download Model", model_bytes, "model.pkl", key='class_svm', use_container_width=True)
|
396 |
+
|
397 |
+
elif model == "Decision Tree":
|
398 |
+
from sklearn.tree import DecisionTreeClassifier
|
399 |
+
import pickle
|
400 |
+
|
401 |
+
dt = DecisionTreeClassifier()
|
402 |
+
dt.fit(st.session_state.X_train, st.session_state.y_train)
|
403 |
+
|
404 |
+
pickle.dump(dt, open('model.pkl','wb'))
|
405 |
+
st.success("Model built successfully. You can now proceed to Evaluation.")
|
406 |
+
|
407 |
+
model_file = open("model.pkl", "rb")
|
408 |
+
model_bytes = model_file.read()
|
409 |
+
col2.download_button("Download Model", model_bytes, "model.pkl", key='class_dt', use_container_width=True)
|
410 |
+
|
411 |
+
|
412 |
+
elif model == "Random Forest":
|
413 |
+
from sklearn.ensemble import RandomForestClassifier
|
414 |
+
import pickle
|
415 |
+
|
416 |
+
rf = RandomForestClassifier()
|
417 |
+
rf.fit(st.session_state.X_train, st.session_state.y_train)
|
418 |
+
|
419 |
+
pickle.dump(rf, open('model.pkl','wb'))
|
420 |
+
st.success("Model built successfully. You can now proceed to Evaluation.")
|
421 |
+
|
422 |
+
model_file = open("model.pkl", "rb")
|
423 |
+
model_bytes = model_file.read()
|
424 |
+
col2.download_button("Download Model", model_bytes, "model.pkl", key='class_rf', use_container_width=True)
|
425 |
+
|
426 |
+
elif model == "XGBoost":
|
427 |
+
from xgboost import XGBClassifier
|
428 |
+
import pickle
|
429 |
+
|
430 |
+
xgb = XGBClassifier()
|
431 |
+
xgb.fit(st.session_state.X_train, st.session_state.y_train)
|
432 |
+
|
433 |
+
pickle.dump(xgb, open('model.pkl','wb'))
|
434 |
+
st.success("Model built successfully. You can now proceed to Evaluation.")
|
435 |
+
|
436 |
+
model_file = open("model.pkl", "rb")
|
437 |
+
model_bytes = model_file.read()
|
438 |
+
col2.download_button("Download Model", model_bytes, "model.pkl", key='class_xgb', use_container_width=True)
|
439 |
+
|
440 |
+
elif model == "LightGBM":
|
441 |
+
from lightgbm import LGBMClassifier
|
442 |
+
import pickle
|
443 |
+
|
444 |
+
lgbm = LGBMClassifier()
|
445 |
+
lgbm.fit(st.session_state.X_train, st.session_state.y_train)
|
446 |
+
|
447 |
+
pickle.dump(lgbm, open('model.pkl','wb'))
|
448 |
+
st.success("Model built successfully. You can now proceed to Evaluation.")
|
449 |
+
|
450 |
+
model_file = open("model.pkl", "rb")
|
451 |
+
model_bytes = model_file.read()
|
452 |
+
col2.download_button("Download Model", model_bytes, "model.pkl", key='class_lgbm', use_container_width=True)
|
453 |
+
|
454 |
+
|
455 |
+
elif model == "CatBoost":
|
456 |
+
from catboost import CatBoostClassifier
|
457 |
+
import pickle
|
458 |
+
|
459 |
+
cb = CatBoostClassifier()
|
460 |
+
cb.fit(st.session_state.X_train, st.session_state.y_train)
|
461 |
+
|
462 |
+
pickle.dump(cb, open('model.pkl','wb'))
|
463 |
+
st.success("Model built successfully. You can now proceed to Evaluation.")
|
464 |
+
|
465 |
+
model_file = open("model.pkl", "rb")
|
466 |
+
model_bytes = model_file.read()
|
467 |
+
col2.download_button("Download Model", model_bytes, "model.pkl", key='class_cb', use_container_width=True)
|
468 |
+
|
469 |
+
elif problem_type == "Regression":
|
470 |
+
|
471 |
+
if model == "Linear Regression":
|
472 |
+
from sklearn.linear_model import LinearRegression
|
473 |
+
import pickle
|
474 |
+
|
475 |
+
lr = LinearRegression()
|
476 |
+
lr.fit(st.session_state.X_train, st.session_state.y_train)
|
477 |
+
|
478 |
+
pickle.dump(lr, open('model.pkl','wb'))
|
479 |
+
st.success("Model built successfully. You can now proceed to Evaluation.")
|
480 |
+
|
481 |
+
model_file = open("model.pkl", "rb")
|
482 |
+
model_bytes = model_file.read()
|
483 |
+
col2.download_button("Download Model", model_bytes, "model.pkl", key="reg_lin_reg", use_container_width=True)
|
484 |
+
|
485 |
+
elif model == "K Nearest Neighbors":
|
486 |
+
from sklearn.neighbors import KNeighborsRegressor
|
487 |
+
import pickle
|
488 |
+
|
489 |
+
knn = KNeighborsRegressor()
|
490 |
+
knn.fit(st.session_state.X_train, st.session_state.y_train)
|
491 |
+
|
492 |
+
pickle.dump(knn, open('model.pkl','wb'))
|
493 |
+
st.success("Model built successfully. You can now proceed to Evaluation.")
|
494 |
+
|
495 |
+
model_file = open("model.pkl", "rb")
|
496 |
+
model_bytes = model_file.read()
|
497 |
+
col2.download_button("Download Model", model_bytes, "model.pkl", key="reg_knn", use_container_width=True)
|
498 |
+
|
499 |
+
elif model == "Support Vector Machine":
|
500 |
+
from sklearn.svm import SVR
|
501 |
+
import pickle
|
502 |
+
|
503 |
+
svm = SVR()
|
504 |
+
svm.fit(st.session_state.X_train, st.session_state.y_train)
|
505 |
+
|
506 |
+
pickle.dump(svm, open('model.pkl','wb'))
|
507 |
+
st.success("Model built successfully. You can now proceed to Evaluation.")
|
508 |
+
|
509 |
+
model_file = open("model.pkl", "rb")
|
510 |
+
model_bytes = model_file.read()
|
511 |
+
col2.download_button("Download Model", model_bytes, "model.pkl", key="reg_svm", use_container_width=True)
|
512 |
+
|
513 |
+
elif model == "Decision Tree":
|
514 |
+
from sklearn.tree import DecisionTreeRegressor
|
515 |
+
import pickle
|
516 |
+
|
517 |
+
dt = DecisionTreeRegressor()
|
518 |
+
dt.fit(st.session_state.X_train, st.session_state.y_train)
|
519 |
+
|
520 |
+
pickle.dump(dt, open('model.pkl','wb'))
|
521 |
+
st.success("Model built successfully. You can now proceed to Evaluation.")
|
522 |
+
|
523 |
+
model_file = open("model.pkl", "rb")
|
524 |
+
model_bytes = model_file.read()
|
525 |
+
col2.download_button("Download Model", model_bytes, "model.pkl", key="reg_dt", use_container_width=True)
|
526 |
+
|
527 |
+
elif model == "Random Forest":
|
528 |
+
from sklearn.ensemble import RandomForestRegressor
|
529 |
+
import pickle
|
530 |
+
|
531 |
+
rf = RandomForestRegressor()
|
532 |
+
rf.fit(st.session_state.X_train, st.session_state.y_train)
|
533 |
+
|
534 |
+
pickle.dump(rf, open('model.pkl','wb'))
|
535 |
+
st.success("Model built successfully. You can now proceed to Evaluation.")
|
536 |
+
|
537 |
+
model_file = open("model.pkl", "rb")
|
538 |
+
model_bytes = model_file.read()
|
539 |
+
col2.download_button('Download Model', model_bytes, 'model.pkl', key="reg_rf", use_container_width=True)
|
540 |
+
|
541 |
+
|
542 |
+
|
543 |
+
elif model == "XGBoost":
|
544 |
+
from xgboost import XGBRegressor
|
545 |
+
import pickle
|
546 |
+
|
547 |
+
xgb = XGBRegressor()
|
548 |
+
xgb.fit(st.session_state.X_train, st.session_state.y_train)
|
549 |
+
|
550 |
+
pickle.dump(xgb, open('model.pkl','wb'))
|
551 |
+
st.success("Model built successfully. You can now proceed to Evaluation.")
|
552 |
+
|
553 |
+
model_file = open('model.pkl', 'rb')
|
554 |
+
model_bytes = model_file.read()
|
555 |
+
col2.download_button('Download Model', model_bytes, 'model.pkl', key="reg_xgb", use_container_width=True)
|
556 |
+
|
557 |
+
|
558 |
+
elif model == "LightGBM":
|
559 |
+
from lightgbm import LGBMRegressor
|
560 |
+
import pickle
|
561 |
+
|
562 |
+
lgbm = LGBMRegressor()
|
563 |
+
lgbm.fit(st.session_state.X_train, st.session_state.y_train)
|
564 |
+
|
565 |
+
pickle.dump(lgbm, open('model.pkl','wb'))
|
566 |
+
st.success("Model built successfully. You can now proceed to Evaluation.")
|
567 |
+
|
568 |
+
model_file = open('model.pkl', 'rb')
|
569 |
+
model_bytes = model_file.read()
|
570 |
+
col2.download_button('Download Model', model_bytes, 'model.pkl', key="reg_lgbm", use_container_width=True)
|
571 |
+
|
572 |
+
|
573 |
+
elif model == "CatBoost":
|
574 |
+
from catboost import CatBoostRegressor
|
575 |
+
import pickle
|
576 |
+
|
577 |
+
cb = CatBoostRegressor()
|
578 |
+
cb.fit(st.session_state.X_train, st.session_state.y_train)
|
579 |
+
|
580 |
+
pickle.dump(cb, open('model.pkl','wb'))
|
581 |
+
st.success("Model built successfully. You can now proceed to Evaluation.")
|
582 |
+
|
583 |
+
model_file = open('model.pkl', 'rb')
|
584 |
+
model_bytes = model_file.read()
|
585 |
+
col2.download_button('Download Model', model_bytes, 'model.pkl', key="reg_cb", use_container_width=True)
|
586 |
+
|
587 |
+
# # Evaluation
|
588 |
+
if st.session_state.build_model_done:
|
589 |
+
new_line()
|
590 |
+
st.subheader("Evaluation", anchor=False)
|
591 |
+
new_line()
|
592 |
+
# with st.expander("Show Evaluation Metrics"):
|
593 |
+
if st.session_state.split_type == "Train and Test":
|
594 |
+
|
595 |
+
if problem_type == "Classification":
|
596 |
+
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
|
597 |
+
import pickle
|
598 |
+
|
599 |
+
model = pickle.load(open('model.pkl','rb'))
|
600 |
+
y_pred = model.predict(st.session_state.X_test)
|
601 |
+
if not st.session_state.no_svm:
|
602 |
+
y_prob = model.predict_proba(st.session_state.X_test)[:,1]
|
603 |
+
|
604 |
+
# Dataframe to store the metrics values for each set
|
605 |
+
metrics_df = pd.DataFrame(columns=["Accuracy", "Precision", "Recall", "F1", "ROC AUC"], index=["Train", "Test"])
|
606 |
+
metrics_df.loc["Train", "Accuracy"] = accuracy_score(st.session_state.y_train, model.predict(st.session_state.X_train))
|
607 |
+
metrics_df.loc["Train", "Precision"] = precision_score(st.session_state.y_train, model.predict(st.session_state.X_train))
|
608 |
+
metrics_df.loc["Train", "Recall"] = recall_score(st.session_state.y_train, model.predict(st.session_state.X_train))
|
609 |
+
metrics_df.loc["Train", "F1"] = f1_score(st.session_state.y_train, model.predict(st.session_state.X_train))
|
610 |
+
if not st.session_state.no_svm:
|
611 |
+
metrics_df.loc["Train", "ROC AUC"] = roc_auc_score(st.session_state.y_train, model.predict_proba(st.session_state.X_train)[:,1])
|
612 |
+
metrics_df.loc["Test", "Accuracy"] = accuracy_score(st.session_state.y_test, y_pred)
|
613 |
+
metrics_df.loc["Test", "Precision"] = precision_score(st.session_state.y_test, y_pred)
|
614 |
+
metrics_df.loc["Test", "Recall"] = recall_score(st.session_state.y_test, y_pred)
|
615 |
+
metrics_df.loc["Test", "F1"] = f1_score(st.session_state.y_test, y_pred)
|
616 |
+
metrics_df.loc["Test", "ROC AUC"] = roc_auc_score(st.session_state.y_test, y_prob)
|
617 |
+
|
618 |
+
|
619 |
+
new_line()
|
620 |
+
|
621 |
+
# Plot the other metrics using plotly
|
622 |
+
st.markdown("#### Metrics Plot")
|
623 |
+
import plotly.graph_objects as go
|
624 |
+
fig = go.Figure(data=[
|
625 |
+
go.Bar(name='Train', x=metrics_df.columns.tolist(), y=metrics_df.loc["Train", :].values.tolist()),
|
626 |
+
go.Bar(name='Test', x=metrics_df.columns.tolist(), y=metrics_df.loc["Test", :].values.tolist())
|
627 |
+
])
|
628 |
+
st.plotly_chart(fig)
|
629 |
+
|
630 |
+
|
631 |
+
# Plot the ROC Curve using px
|
632 |
+
import plotly.express as px
|
633 |
+
from sklearn.metrics import roc_curve
|
634 |
+
|
635 |
+
fpr, tpr, thresholds = roc_curve(st.session_state.y_test, y_prob)
|
636 |
+
fig = px.area(
|
637 |
+
x=fpr, y=tpr,
|
638 |
+
title=f'ROC Curve (AUC={metrics_df.loc["Test", "ROC AUC"]:.4f})',
|
639 |
+
labels=dict(x='False Positive Rate', y='True Positive Rate'),
|
640 |
+
width=400, height=500
|
641 |
+
)
|
642 |
+
fig.add_shape(
|
643 |
+
type='line', line=dict(dash='dash'),
|
644 |
+
x0=0, x1=1, y0=0, y1=1
|
645 |
+
)
|
646 |
+
|
647 |
+
fig.update_yaxes(scaleanchor="x", scaleratio=1)
|
648 |
+
fig.update_xaxes(constrain='domain')
|
649 |
+
st.plotly_chart(fig)
|
650 |
+
|
651 |
+
# Display the metrics values
|
652 |
+
new_line()
|
653 |
+
st.markdown("##### Metrics Values")
|
654 |
+
st.write(metrics_df)
|
655 |
+
|
656 |
+
# Plot confusion matrix as plot with plot_confusion_matrix
|
657 |
+
# from sklearn.metrics import plot_confusion_matrix
|
658 |
+
import matplotlib.pyplot as plt
|
659 |
+
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
|
660 |
+
st.markdown("#### Confusion Matrix")
|
661 |
+
new_line()
|
662 |
+
|
663 |
+
model = pickle.load(open('model.pkl','rb'))
|
664 |
+
y_pred = model.predict(st.session_state.X_test)
|
665 |
+
|
666 |
+
# cm = confusion_matrix(y_test, y_pred_test)
|
667 |
+
fig, ax = plt.subplots(figsize=(6,6))
|
668 |
+
ConfusionMatrixDisplay.from_predictions(st.session_state.y_test, y_pred, ax=ax)
|
669 |
+
st.pyplot(fig)
|
670 |
+
|
671 |
+
|
672 |
+
|
673 |
+
elif problem_type == "Regression":
|
674 |
+
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
|
675 |
+
import pickle
|
676 |
+
|
677 |
+
model = pickle.load(open('model.pkl','rb'))
|
678 |
+
y_pred = model.predict(st.session_state.X_test)
|
679 |
+
|
680 |
+
# Dataframe to store the metrics values for each set with RMSE
|
681 |
+
metrics_df = pd.DataFrame(columns=["Mean Squared Error", "Mean Absolute Error", "R2 Score"], index=["Train", "Test"])
|
682 |
+
metrics_df.loc["Train", "Mean Squared Error"] = mean_squared_error(st.session_state.y_train, model.predict(st.session_state.X_train))
|
683 |
+
metrics_df.loc["Train", "Mean Absolute Error"] = mean_absolute_error(st.session_state.y_train, model.predict(st.session_state.X_train))
|
684 |
+
metrics_df.loc["Train", "R2 Score"] = r2_score(st.session_state.y_train, model.predict(st.session_state.X_train))
|
685 |
+
metrics_df.loc['Train', 'RMSE'] = np.sqrt(metrics_df.loc['Train', 'Mean Squared Error'])
|
686 |
+
metrics_df.loc["Test", "Mean Squared Error"] = mean_squared_error(st.session_state.y_test, y_pred)
|
687 |
+
metrics_df.loc["Test", "Mean Absolute Error"] = mean_absolute_error(st.session_state.y_test, y_pred)
|
688 |
+
metrics_df.loc["Test", "R2 Score"] = r2_score(st.session_state.y_test, y_pred)
|
689 |
+
metrics_df.loc['Test', 'RMSE'] = np.sqrt(metrics_df.loc['Test', 'Mean Squared Error'])
|
690 |
+
|
691 |
+
new_line()
|
692 |
+
|
693 |
+
# Plot the other metrics using plotly
|
694 |
+
st.markdown("#### Metrics Plot")
|
695 |
+
import plotly.graph_objects as go
|
696 |
+
fig = go.Figure(data=[
|
697 |
+
go.Bar(name='Train', x=metrics_df.columns.tolist(), y=metrics_df.loc["Train", :].values.tolist()),
|
698 |
+
go.Bar(name='Test', x=metrics_df.columns.tolist(), y=metrics_df.loc["Test", :].values.tolist())
|
699 |
+
])
|
700 |
+
st.plotly_chart(fig)
|
701 |
+
|
702 |
+
# Display the metrics values
|
703 |
+
new_line()
|
704 |
+
st.markdown("##### Metrics Values")
|
705 |
+
st.write(metrics_df)
|
706 |
+
|
707 |
+
|
708 |
+
elif st.session_state.split_type == "Train, Validation and Test":
|
709 |
+
|
710 |
+
if problem_type == "Classification":
|
711 |
+
|
712 |
+
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
|
713 |
+
import pickle
|
714 |
+
|
715 |
+
model = pickle.load(open('model.pkl','rb'))
|
716 |
+
y_pred = model.predict(st.session_state.X_test)
|
717 |
+
if not st.session_state.no_svm:
|
718 |
+
y_prob = model.predict_proba(st.session_state.X_test)[:,1]
|
719 |
+
|
720 |
+
# Dataframe to store the metrics values for each set
|
721 |
+
metrics_df = pd.DataFrame(columns=["Accuracy", "Precision", "Recall", "F1", "ROC AUC"], index=["Train", "Validation", "Test"])
|
722 |
+
metrics_df.loc["Train", "Accuracy"] = accuracy_score(st.session_state.y_train, model.predict(st.session_state.X_train))
|
723 |
+
metrics_df.loc["Train", "Precision"] = precision_score(st.session_state.y_train, model.predict(st.session_state.X_train))
|
724 |
+
metrics_df.loc["Train", "Recall"] = recall_score(st.session_state.y_train, model.predict(st.session_state.X_train))
|
725 |
+
metrics_df.loc["Train", "F1"] = f1_score(st.session_state.y_train, model.predict(st.session_state.X_train))
|
726 |
+
if not st.session_state.no_svm:
|
727 |
+
metrics_df.loc["Train", "ROC AUC"] = roc_auc_score(st.session_state.y_train, model.predict_proba(st.session_state.X_train)[:,1])
|
728 |
+
metrics_df.loc["Validation", "Accuracy"] = accuracy_score(st.session_state.y_val, model.predict(st.session_state.X_val))
|
729 |
+
metrics_df.loc["Validation", "Precision"] = precision_score(st.session_state.y_val, model.predict(st.session_state.X_val))
|
730 |
+
metrics_df.loc["Validation", "Recall"] = recall_score(st.session_state.y_val, model.predict(st.session_state.X_val))
|
731 |
+
metrics_df.loc["Validation", "F1"] = f1_score(st.session_state.y_val, model.predict(st.session_state.X_val))
|
732 |
+
if not st.session_state.no_svm:
|
733 |
+
metrics_df.loc["Validation", "ROC AUC"] = roc_auc_score(st.session_state.y_val, model.predict_proba(st.session_state.X_val)[:,1])
|
734 |
+
metrics_df.loc["Test", "Accuracy"] = accuracy_score(st.session_state.y_test, y_pred)
|
735 |
+
metrics_df.loc["Test", "Precision"] = precision_score(st.session_state.y_test, y_pred)
|
736 |
+
metrics_df.loc["Test", "Recall"] = recall_score(st.session_state.y_test, y_pred)
|
737 |
+
metrics_df.loc["Test", "F1"] = f1_score(st.session_state.y_test, y_pred)
|
738 |
+
if not st.session_state.no_svm:
|
739 |
+
metrics_df.loc["Test", "ROC AUC"] = roc_auc_score(st.session_state.y_test, y_prob)
|
740 |
+
|
741 |
+
|
742 |
+
new_line()
|
743 |
+
|
744 |
+
# Plot the other metrics using plotly
|
745 |
+
st.markdown("#### Metrics Plot")
|
746 |
+
import plotly.graph_objects as go
|
747 |
+
fig = go.Figure(data=[
|
748 |
+
go.Bar(name='Train', x=metrics_df.columns.tolist(), y=metrics_df.loc["Train", :].values.tolist()),
|
749 |
+
go.Bar(name='Validation', x=metrics_df.columns.tolist(), y=metrics_df.loc["Validation", :].values.tolist()),
|
750 |
+
go.Bar(name='Test', x=metrics_df.columns.tolist(), y=metrics_df.loc["Test", :].values.tolist())
|
751 |
+
])
|
752 |
+
st.plotly_chart(fig)
|
753 |
+
|
754 |
+
|
755 |
+
# Plot the ROC Curve using px
|
756 |
+
if not st.session_state.no_svm:
|
757 |
+
import plotly.express as px
|
758 |
+
from sklearn.metrics import roc_curve
|
759 |
+
|
760 |
+
fpr, tpr, thresholds = roc_curve(st.session_state.y_test, y_prob)
|
761 |
+
fig = px.area(
|
762 |
+
x=fpr, y=tpr,
|
763 |
+
title=f'ROC Curve (AUC={metrics_df.loc["Test", "ROC AUC"]:.4f})',
|
764 |
+
labels=dict(x='False Positive Rate', y='True Positive Rate'),
|
765 |
+
width=400, height=500
|
766 |
+
)
|
767 |
+
fig.add_shape(
|
768 |
+
type='line', line=dict(dash='dash'),
|
769 |
+
x0=0, x1=1, y0=0, y1=1
|
770 |
+
)
|
771 |
+
|
772 |
+
fig.update_yaxes(scaleanchor="x", scaleratio=1)
|
773 |
+
fig.update_xaxes(constrain='domain')
|
774 |
+
st.plotly_chart(fig)
|
775 |
+
|
776 |
+
# Display the metrics values
|
777 |
+
new_line()
|
778 |
+
st.markdown("##### Metrics Values")
|
779 |
+
st.write(metrics_df)
|
780 |
+
|
781 |
+
# Plot confusion matrix as plot with plot_confusion_matrix
|
782 |
+
# from sklearn.metrics import plot_confusion_matrix
|
783 |
+
import matplotlib.pyplot as plt
|
784 |
+
|
785 |
+
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
|
786 |
+
st.markdown("#### Confusion Matrix")
|
787 |
+
new_line()
|
788 |
+
|
789 |
+
model = pickle.load(open('model.pkl','rb'))
|
790 |
+
y_pred = model.predict(st.session_state.X_test)
|
791 |
+
|
792 |
+
# cm = confusion_matrix(y_test, y_pred_test)
|
793 |
+
fig, ax = plt.subplots(figsize=(6,6))
|
794 |
+
ConfusionMatrixDisplay.from_predictions(st.session_state.y_test, y_pred, ax=ax)
|
795 |
+
st.pyplot(fig)
|
796 |
+
|
797 |
+
|
798 |
+
|
799 |
+
|
800 |
+
|
801 |
+
elif problem_type == "Regression":
|
802 |
+
|
803 |
+
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
|
804 |
+
import pickle
|
805 |
+
|
806 |
+
model = pickle.load(open('model.pkl','rb'))
|
807 |
+
y_pred = model.predict(st.session_state.X_test)
|
808 |
+
|
809 |
+
# Dataframe to store the metrics values for each set with RMSE
|
810 |
+
metrics_df = pd.DataFrame(columns=["Mean Squared Error", "Mean Absolute Error", "R2 Score"], index=["Train", "Validation", "Test"])
|
811 |
+
metrics_df.loc["Train", "Mean Squared Error"] = mean_squared_error(st.session_state.y_train, model.predict(st.session_state.X_train))
|
812 |
+
metrics_df.loc["Train", "Mean Absolute Error"] = mean_absolute_error(st.session_state.y_train, model.predict(st.session_state.X_train))
|
813 |
+
metrics_df.loc["Train", "R2 Score"] = r2_score(st.session_state.y_train, model.predict(st.session_state.X_train))
|
814 |
+
metrics_df.loc['Train', 'RMSE'] = np.sqrt(metrics_df.loc['Train', 'Mean Squared Error'])
|
815 |
+
metrics_df.loc["Validation", "Mean Squared Error"] = mean_squared_error(st.session_state.y_val, model.predict(st.session_state.X_val))
|
816 |
+
metrics_df.loc["Validation", "Mean Absolute Error"] = mean_absolute_error(st.session_state.y_val, model.predict(st.session_state.X_val))
|
817 |
+
metrics_df.loc["Validation", "R2 Score"] = r2_score(st.session_state.y_val, model.predict(st.session_state.X_val))
|
818 |
+
metrics_df.loc['Validation', 'RMSE'] = np.sqrt(metrics_df.loc['Validation', 'Mean Squared Error'])
|
819 |
+
metrics_df.loc["Test", "Mean Squared Error"] = mean_squared_error(st.session_state.y_test, y_pred)
|
820 |
+
metrics_df.loc["Test", "Mean Absolute Error"] = mean_absolute_error(st.session_state.y_test, y_pred)
|
821 |
+
metrics_df.loc["Test", "R2 Score"] = r2_score(st.session_state.y_test, y_pred)
|
822 |
+
metrics_df.loc['Test', 'RMSE'] = np.sqrt(metrics_df.loc['Test', 'Mean Squared Error'])
|
823 |
+
|
824 |
+
new_line()
|
825 |
+
|
826 |
+
# Plot the other metrics using plotly
|
827 |
+
st.markdown("#### Metrics Plot")
|
828 |
+
import plotly.graph_objects as go
|
829 |
+
fig = go.Figure(data=[
|
830 |
+
go.Bar(name='Train', x=metrics_df.columns.tolist(), y=metrics_df.loc["Train", :].values.tolist()),
|
831 |
+
go.Bar(name='Validation', x=metrics_df.columns.tolist(), y=metrics_df.loc["Validation", :].values.tolist()),
|
832 |
+
go.Bar(name='Test', x=metrics_df.columns.tolist(), y=metrics_df.loc["Test", :].values.tolist())
|
833 |
+
])
|
834 |
+
st.plotly_chart(fig)
|
835 |
+
|
836 |
+
# Display the metrics values
|
837 |
+
new_line()
|
838 |
+
st.markdown("##### Metrics Values")
|
839 |
+
st.write(metrics_df)
|
pages/3_π_StudyML.py
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pages/4_π_About.py
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import streamlit as st
|
3 |
+
import requests
|
4 |
+
import PIL.Image as Image
|
5 |
+
|
6 |
+
|
7 |
+
def new_line():
|
8 |
+
st.markdown("<br>", unsafe_allow_html=True)
|
9 |
+
|
10 |
+
|
11 |
+
# Define a function to load the Lottie animation
|
12 |
+
def load_lottieurl(url: str):
|
13 |
+
r = requests.get(url)
|
14 |
+
if r.status_code != 200:
|
15 |
+
return None
|
16 |
+
return r.json()
|
17 |
+
|
18 |
+
# Config
|
19 |
+
page_icon = Image.open("./assets/icon.png")
|
20 |
+
st.set_page_config(layout="centered", page_title="Click ML", page_icon=page_icon)
|
21 |
+
|
22 |
+
|
23 |
+
# Create the About page
|
24 |
+
def main():
|
25 |
+
# Title Page
|
26 |
+
st.markdown("<h1 align='center'> πAbout", unsafe_allow_html=True)
|
27 |
+
new_line()
|
28 |
+
|
29 |
+
# What is ClickML?
|
30 |
+
st.markdown("Welcome to ClickML, an intuitive and powerful machine learning application designed to simplify the process of building and evaluating machine learning models. Whether you're a beginner or an experienced data scientist, ClickML provides a user-friendly interface to streamline your machine learning workflows.", unsafe_allow_html=True)
|
31 |
+
st.markdown("It is no-code easy-to-use platfrom which allows you to build machine learning models without writing a single line of code. \n ")
|
32 |
+
|
33 |
+
# Show Video Prom
|
34 |
+
video_path = "./assets/Promo.mp4"
|
35 |
+
video_bytes = open(video_path, "rb").read()
|
36 |
+
st.video(video_bytes)
|
37 |
+
|
38 |
+
|
39 |
+
# what this app does with the main, quickml, and study_time pages
|
40 |
+
st.markdown("This app is divided into three main tabs: **π ClickML**, **π QuickML**, and **π StudyML**.", unsafe_allow_html=True)
|
41 |
+
st.write("\n")
|
42 |
+
|
43 |
+
# ClickML
|
44 |
+
st.markdown("### π ClickML")
|
45 |
+
st.markdown("- **ClickML:** This section is the main page of the **ClickML** web app. It provides the customizability to build Machine Learning models by selecting and applying the Data Preparation techniques that fits your data. Also, you can try differnet Machine Learning models and tune the hyperparameters to get the best model.", unsafe_allow_html=True)
|
46 |
+
st.write("\n")
|
47 |
+
|
48 |
+
# QuickML
|
49 |
+
st.markdown("### π QuickML")
|
50 |
+
st.markdown("- **QuickML:** QuickML is a tab that allows you to build a model quickly with just a few clicks. This tab is designed for people who are new to Machine Learning and want to build a model quickly without having to go through the entire process of Exploratory Data Analysis, Data Cleaning, Feature Engineering, etc. It is just a quick way to build a model for testing purposes.", unsafe_allow_html=True)
|
51 |
+
st.write("\n")
|
52 |
+
|
53 |
+
# StudyML
|
54 |
+
st.markdown("### π StudyML")
|
55 |
+
st.markdown("- **Study Time:** The StudyML tab is designed to help you to understand the key concepts of building machine learning models. This tab has 7 sections, each section talk about a specific concept in building machine learning models. With each section you will have the uplility to apply the concepts of this sections on multiple datasets. The code the Explaination and everything you need to understand is in this tab.", unsafe_allow_html=True)
|
56 |
+
new_line()
|
57 |
+
|
58 |
+
# Why ClickML?
|
59 |
+
st.header("β¨ Why Choose ClickML?")
|
60 |
+
st.markdown("""
|
61 |
+
- **User-Friendly Interface**: ClickML offers an intuitive and easy-to-use interface, making machine learning accessible to users of all skill levels.
|
62 |
+
- **Efficiency and Speed**: With ClickML, you can quickly build, train, and evaluate machine learning models, reducing the time and effort required.
|
63 |
+
- **Comprehensive Learning Resources**: The StudyML tab provides detailed explanations, code examples, and visualizations to enhance your understanding of machine learning concepts.
|
64 |
+
- **Flexible and Customizable**: ClickML supports a wide range of algorithms and allows you to fine-tune model parameters to meet your specific requirements.
|
65 |
+
|
66 |
+
""", unsafe_allow_html=True)
|
67 |
+
new_line()
|
68 |
+
|
69 |
+
|
70 |
+
# How to use ClickML?
|
71 |
+
st.header("π How to Use ClickML?")
|
72 |
+
st.markdown("Below is a video that explains how to use ClickML by building a machine learning model on the Titanic dataset and by using all the features of ClickML.", unsafe_allow_html=True)
|
73 |
+
st.video("./assets/Tutorial.mp4")
|
74 |
+
|
75 |
+
# Contributors
|
76 |
+
st.header(" π€ Contributors")
|
77 |
+
st.markdown("This application was developed and maintained by **Basel Mathar**.", unsafe_allow_html=True)
|
78 |
+
st.markdown("Basel is a Data Scientist and a Machine Learning Engineer. He is passionate about building Machine Learning models and creating web apps to help others build Machine Learning models.", unsafe_allow_html=True)
|
79 |
+
new_line()
|
80 |
+
|
81 |
+
|
82 |
+
|
83 |
+
# Source Code
|
84 |
+
st.header(" π Source Code")
|
85 |
+
st.markdown("The source code for this app is available on [**GitHub**](https://github.com/baselhusam/clickml). Feel free to contribute, provide feedback, or customize the application to suit your needs.", unsafe_allow_html=True)
|
86 |
+
st.markdown("You can open the terminal and run the following commands to download the source code and run the app locally:", unsafe_allow_html=True)
|
87 |
+
st.code("""git clone https://github.com/baselhusam/ClickML.git
|
88 |
+
pip install -r requirements.txt
|
89 |
+
streamlit run 1_ClickML.py""",
|
90 |
+
language="bash")
|
91 |
+
new_line()
|
92 |
+
|
93 |
+
# Roadmap
|
94 |
+
st.header(" πΊοΈ Roadmap")
|
95 |
+
st.markdown("""This is a roadmap for the ClickML project. It will show the current status of the project and the future work that needs to be done.
|
96 |
+
Visit the [**ClickML Roadmap**](https://clickml-roadmap.streamlit.app/) for more information.""", unsafe_allow_html = True)
|
97 |
+
new_line()
|
98 |
+
|
99 |
+
# Contact Us
|
100 |
+
st.header(" π¬ Contact Us")
|
101 |
+
st.markdown("""If you have any questions or suggestions, please feel free to contact us at **[email protected]**. We're here to help!
|
102 |
+
|
103 |
+
**Connect with us on social media:**
|
104 |
+
|
105 |
+
<a href="https://www.linkedin.com/company/clickml/?viewAsMember=true" target="_blank">
|
106 |
+
<img src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQe0adDoUGWVD3jGzfT8grK5Uhw0dLXSk3OWJwZaXI-t95suRZQ-wPF7-Az6KurXDVktV4&usqp=CAU" alt="LinkedIn" width="80" height="80" style="border-radius: 25%;">
|
107 |
+
</a> σ ͺ σ ͺ σ ͺ σ ͺ σ ͺ
|
108 |
+
<a href="https://www.instagram.com/baselhusam/" target="_blank">
|
109 |
+
<img src="https://upload.wikimedia.org/wikipedia/commons/thumb/e/e7/Instagram_logo_2016.svg/2048px-Instagram_logo_2016.svg.png" alt="Instagram" width="80" height="80" style="border-radius: 25%;">
|
110 |
+
</a> σ ͺ σ ͺ σ ͺ σ ͺ σ ͺ
|
111 |
+
<a href="https://www.facebook.com/profile.php?id=100088667931989" target="_blank">
|
112 |
+
<img src="https://seeklogo.com/images/F/facebook-logo-C64946D6D2-seeklogo.com.png" alt="Facebook" width="80" height="80" style="border-radius: 25%;">
|
113 |
+
</a>
|
114 |
+
|
115 |
+
<br>
|
116 |
+
<br>
|
117 |
+
|
118 |
+
We look forward to hearing from you and supporting you on your machine learning journey!
|
119 |
+
|
120 |
+
|
121 |
+
""", unsafe_allow_html=True)
|
122 |
+
|
123 |
+
|
124 |
+
if __name__ == "__main__":
|
125 |
+
main()
|