# Importing Libraries import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import plotly.express as px import time from PIL import Image from wordcloud import WordCloud # Config page_icon = Image.open("./assets/logo.png") st.set_page_config(layout="centered", page_title="Click Analyst", page_icon=page_icon) # Initial State def initial_state(): if 'df' not in st.session_state: st.session_state['df'] = None if 'X_train' not in st.session_state: st.session_state['X_train'] = None if 'X_test' not in st.session_state: st.session_state['X_test'] = None if 'y_train' not in st.session_state: st.session_state['y_train'] = None if 'y_test' not in st.session_state: st.session_state['y_test'] = None if 'X_val' not in st.session_state: st.session_state['X_val'] = None if 'y_val' not in st.session_state: st.session_state['y_val'] = None if "model" not in st.session_state: st.session_state['model'] = None if 'trained_model' not in st.session_state: st.session_state['trained_model'] = False if "trained_model_bool" not in st.session_state: st.session_state['trained_model_bool'] = False if "problem_type" not in st.session_state: st.session_state['problem_type'] = None if "metrics_df" not in st.session_state: st.session_state['metrics_df'] = pd.DataFrame() if "is_train" not in st.session_state: st.session_state['is_train'] = False if "is_test" not in st.session_state: st.session_state['is_test'] = False if "is_val" not in st.session_state: st.session_state['is_val'] = False if "show_eval" not in st.session_state: st.session_state['show_eval'] = False if "all_the_process" not in st.session_state: st.session_state['all_the_process'] = """""" if "all_the_process_predictions" not in st.session_state: st.session_state['all_the_process_predictions'] = False if 'y_pred_train' not in st.session_state: st.session_state['y_pred_train'] = None if 'y_pred_test' not in st.session_state: st.session_state['y_pred_test'] = None if 'y_pred_val' not in st.session_state: st.session_state['y_pred_val'] = None if 'uploading_way' not in st.session_state: st.session_state['uploading_way'] = None if "lst_models" not in st.session_state: st.session_state["lst_models"] = [] if "lst_models_predctions" not in st.session_state: st.session_state["lst_models_predctions"] = [] if "models_with_eval" not in st.session_state: st.session_state["models_with_eval"] = dict() if "reset_1" not in st.session_state: st.session_state["reset_1"] = False initial_state() # New Line def new_line(n=1): for i in range(n): st.write("\n") # Load Data st.cache_data() def load_data(upd_file): # Read CSV or Excel file if upd_file.name.endswith('.csv'): return pd.read_csv(upd_file) elif upd_file.name.endswith('.xlsx') or upd_file.name.endswith('.xls'): return pd.read_excel(upd_file) else: raise ValueError("Unsupported file format. Only CSV and Excel files are supported.") # Progress Bar def progress_bar(): my_bar = st.progress(0) for percent_complete in range(100): time.sleep(0.0002) my_bar.progress(percent_complete + 1) # Logo col1, col2, col3 = st.columns([0.25,1,0.25]) col2.image("./assets/logo.png", use_column_width=True) new_line(2) # Description st.markdown("""Welcome to Click Analytics! πŸš€ Dive right into the future of data with our user-friendly platform designed for everyoneβ€”no coding or machine learning experience required! With just a few clicks, you can start preparing your data, training cutting-edge models, and uncovering valuable insights. Whether you're a data enthusiast or a seasoned analyst, Click Analytics empowers you to effortlessly create, analyze, and explore. What are you waiting for? Start building your very own analytics and models today and see what decisions you can empower with your data!!""", unsafe_allow_html=True) st.divider() # Dataframe selection st.markdown("

Getting Started", unsafe_allow_html=True) new_line(1) st.write("The first step is to upload your data. You can upload your data in three ways: **Upload File**, **Select from Ours**, and **Write URL**. In all ways the data should be a csv file and should not exceed 200 MB.") new_line(1) # Uploading Way uploading_way = st.session_state.uploading_way col1, col2, col3 = st.columns(3,gap='large') # Upload def upload_click(): st.session_state.uploading_way = "upload" col1.markdown("
Upload File", unsafe_allow_html=True) col1.button("Upload File", key="upload_file", use_container_width=True, on_click=upload_click) # URL def url_click(): st.session_state.uploading_way = "url" col3.markdown("
Write URL", unsafe_allow_html=True) col3.button("Write URL", key="write_url", use_container_width=True, on_click=url_click) # No Data if st.session_state.df is None: # Upload if uploading_way == "upload": uploaded_file = st.file_uploader("Upload the Dataset", type=["csv", "xlsx", "xls"]) if uploaded_file: try: df = load_data(uploaded_file) st.session_state.df = df except Exception as e: st.error(f"Error loading the file: {e}") # URL elif uploading_way == "url": url = st.text_input("Enter URL") if url: df = load_data(url) st.session_state.df = df # Sidebar with st.sidebar: st.image("./assets/logo.png", use_column_width=True) # Dataframe if st.session_state.df is not None: # Re-initialize the variables from the state df = st.session_state.df X_train = st.session_state.X_train X_test = st.session_state.X_test y_train = st.session_state.y_train y_test = st.session_state.y_test X_val = st.session_state.X_val y_val = st.session_state.y_val trained_model = st.session_state.trained_model is_train = st.session_state.is_train is_test = st.session_state.is_test is_val = st.session_state.is_val model = st.session_state.model show_eval = st.session_state.show_eval y_pred_train = st.session_state.y_pred_train y_pred_test = st.session_state.y_pred_test y_pred_val = st.session_state.y_pred_val metrics_df = st.session_state.metrics_df st.divider() new_line() # EDA st.markdown("### πŸ•΅οΈβ€β™‚οΈ Exploratory Data Analysis", unsafe_allow_html=True) new_line() with st.expander("Show EDA"): new_line() # Head head = st.checkbox("Show First 5 Rows", value=False) new_line() if head: st.dataframe(df.head(), use_container_width=True) # Tail tail = st.checkbox("Show Last 5 Rows", value=False) new_line() if tail: st.dataframe(df.tail(), use_container_width=True) # Shape shape = st.checkbox("Show Shape", value=False) new_line() if shape: st.write(f"This DataFrame has **{df.shape[0]} rows** and **{df.shape[1]} columns**.") new_line() # Columns columns = st.checkbox("Show Columns", value=False) new_line() if columns: st.write(pd.DataFrame(df.columns, columns=['Columns']).T) new_line() if st.checkbox("Check Data Types", value=False): st.write(df.dtypes) new_line() new_line() if st.checkbox("Show Skewness and Kurtosis", value=False): skew_kurt = pd.DataFrame(data={ 'Skewness': df.skew(), 'Kurtosis': df.kurtosis() }) st.write(skew_kurt) new_line() new_line() # Describe Numerical describe = st.checkbox("Show Description **(Numerical Features)**", value=False) new_line() if describe: st.dataframe(df.describe(), use_container_width=True) new_line() if st.checkbox("Unique Value Count", value=False): unique_counts = pd.DataFrame(df.nunique()).rename(columns={0: 'Unique Count'}) st.write(unique_counts) new_line() new_line() # Describe Categorical describe_cat = st.checkbox("Show Description **(Categorical Features)**", value=False) new_line() if describe_cat: if df.select_dtypes(include=np.object).columns.tolist(): st.dataframe(df.describe(include=['object']), use_container_width=True) new_line() else: st.info("There is no Categorical Features.") new_line() # Correlation Matrix using heatmap seabron corr = st.checkbox("Show Correlation", value=False) new_line() if corr: if df.corr().columns.tolist(): fig, ax = plt.subplots() sns.heatmap(df.corr(), cmap='Blues', annot=True, ax=ax) st.pyplot(fig) new_line() else: st.info("There is no Numerical Features.") # Missing Values missing = st.checkbox("Show Missing Values", value=False) new_line() if missing: col1, col2 = st.columns([0.4,1]) with col1: st.markdown("
Number of Null Values", unsafe_allow_html=True) st.dataframe(df.isnull().sum().sort_values(ascending=False),height=350, use_container_width=True) with col2: st.markdown("
Plot for the Null Values ", unsafe_allow_html=True) null_values = df.isnull().sum() null_values = null_values[null_values > 0] null_values = null_values.sort_values(ascending=False) null_values = null_values.to_frame() null_values.columns = ['Count'] null_values.index.names = ['Feature'] null_values['Feature'] = null_values.index fig = px.bar(null_values, x='Feature', y='Count', color='Count', height=350) st.plotly_chart(fig, use_container_width=True) new_line() # Delete Columns delete = st.checkbox("Delete Columns", value=False) new_line() if delete: col_to_delete = st.multiselect("Select Columns to Delete", df.columns) new_line() col1, col2, col3 = st.columns([1,0.7,1]) if col2.button("Delete", use_container_width=True): st.session_state.all_the_process += f""" # Delete Columns df.drop(columns={col_to_delete}, inplace=True) \n """ progress_bar() df.drop(columns=col_to_delete, inplace=True) st.session_state.df = df st.success(f"The Columns **`{col_to_delete}`** are Deleted Successfully!") # Show DataFrame Button col1, col2, col3 = st.columns([0.15,1,0.15]) col2.divider() col1, col2, col3 = st.columns([1, 0.7, 1]) if col2.button("Show DataFrame", use_container_width=True): st.dataframe(df, use_container_width=True) #start point # Histograms for Numerical Features hist = st.checkbox("Show Histograms", value=False) new_line() if hist: numeric_cols = df.select_dtypes(include=np.number).columns.tolist() col_for_hist = st.selectbox("Select Column for Histogram", options=numeric_cols) num_bins = st.slider("Select Number of Bins", min_value=10, max_value=100, value=30) fig, ax = plt.subplots() df[col_for_hist].hist(bins=num_bins, ax=ax, color='skyblue') ax.set_title(f'Histogram of {col_for_hist}') st.pyplot(fig) new_line() # Box Plots for Numerical Features boxplot = st.checkbox("Show Box Plots", value=False) new_line() if boxplot: numeric_cols = df.select_dtypes(include=np.number).columns.tolist() col_for_box = st.selectbox("Select Column for Box Plot", options=numeric_cols) fig, ax = plt.subplots() df.boxplot(column=[col_for_box], ax=ax) ax.set_title(f'Box Plot of {col_for_box}') st.pyplot(fig) new_line() st.set_option('deprecation.showPyplotGlobalUse', False) # Scatter Plots for Numerical Features scatter = st.checkbox("Show Scatter Plots", value=False) new_line() if scatter: numeric_cols = df.select_dtypes(include=np.number).columns.tolist() x_col = st.selectbox("Select X-axis Column", options=numeric_cols, index=0) y_col = st.selectbox("Select Y-axis Column", options=numeric_cols, index=1 if len(numeric_cols) > 1 else 0) fig, ax = plt.subplots() df.plot(kind='scatter', x=x_col, y=y_col, ax=ax, color='red') ax.set_title(f'Scatter Plot between {x_col} and {y_col}') st.pyplot(fig) new_line() # Pair Plots for Numerical Features pairplot = st.checkbox("Show Pair Plots", value=False) new_line() if pairplot: sns.pairplot(df.select_dtypes(include=np.number)) st.pyplot() # Count Plots for Categorical Data countplot = st.checkbox("Show Count Plots", value=False) new_line() if countplot: categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist() col_for_count = st.selectbox("Select Column for Count Plot", options=categorical_cols) fig, ax = plt.subplots() sns.countplot(x=df[col_for_count], data=df, ax=ax) ax.set_title(f'Count Plot of {col_for_count}') st.pyplot(fig) new_line() # Pie Charts for Categorical Data pie_chart = st.checkbox("Show Pie Charts", value=False) new_line() if pie_chart: categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist() col_for_pie = st.selectbox("Select Column for Pie Chart", options=categorical_cols) pie_data = df[col_for_pie].value_counts() fig, ax = plt.subplots() ax.pie(pie_data, labels=pie_data.index, autopct='%1.1f%%', startangle=90) ax.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle. ax.set_title(f'Pie Chart of {col_for_pie}') st.pyplot(fig) new_line() new_line() if st.checkbox("Identify Outliers", value=False): numeric_cols = df.select_dtypes(include=np.number).columns.tolist() col_for_outliers = st.selectbox("Select Column to Check Outliers", options=numeric_cols) fig, ax = plt.subplots() sns.boxplot(x=df[col_for_outliers], ax=ax) ax.set_title(f'Outliers in {col_for_outliers}') st.pyplot(fig) new_line() new_line() if st.checkbox("Show Cross-tabulations", value=False): categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist() x_col = st.selectbox("Select X-axis Column for Cross-tab", options=categorical_cols, index=0) y_col = st.selectbox("Select Y-axis Column for Cross-tab", options=categorical_cols, index=1 if len(categorical_cols) > 1 else 0) cross_tab = pd.crosstab(df[x_col], df[y_col]) st.write(cross_tab) new_line() new_line() if st.checkbox("Segmented Analysis", value=False): segments = st.selectbox("Select Segment", options=df.columns) segment_values = df[segments].dropna().unique() selected_segment = st.selectbox("Choose Segment Value", options=segment_values) segmented_data = df[df[segments] == selected_segment] st.write(segmented_data) new_line() new_line() if st.checkbox("Temporal Analysis", value=False): date_col_options = df.select_dtypes(include=[np.datetime64]).columns.tolist() value_col_options = df.select_dtypes(include=np.number).columns.tolist() if not date_col_options: st.error("No datetime columns found in the DataFrame.") elif not value_col_options: st.error("No numeric columns found in the DataFrame.") else: date_col = st.selectbox("Select Date Column", options=date_col_options) value_col = st.selectbox("Select Value Column", options=value_col_options) fig, ax = plt.subplots() df.set_index(date_col)[value_col].plot(ax=ax) ax.set_title(f'Trend Over Time - {value_col}') st.pyplot(fig) new_line() if st.checkbox("Show Word Cloud", value=False): # Get the list of object-type columns for user to choose from text_col_options = df.select_dtypes(include=[np.object, 'string']).columns.tolist() if text_col_options: # Let the user select a text column text_col = st.selectbox("Select Text Column for Word Cloud", options=text_col_options) # Collect text data, dropping NA values and joining them into a single string text_data = ' '.join(df[text_col].dropna()).strip() if text_data: # Check if there is any text data to use try: wordcloud = WordCloud(width=800, height=400).generate(text_data) fig, ax = plt.subplots() ax.imshow(wordcloud, interpolation='bilinear') ax.axis('off') st.pyplot(fig) except ValueError as e: st.error("Failed to generate word cloud: " + str(e)) else: st.error("No words available to create a word cloud. Please check the selected text data.") else: st.error("No suitable text columns found for creating a word cloud.") new_line() # Interactive Data Tables interactive_table = st.checkbox("Show Interactive Data Table", value=False) new_line() if interactive_table: st.dataframe(df) new_line() # Missing Values new_line() st.markdown("### ⚠️ Missing Values", unsafe_allow_html=True) new_line() with st.expander("Show Missing Values"): # Further Analysis new_line() missing = st.checkbox("Further Analysis", value=False, key='missing') new_line() if missing: col1, col2 = st.columns(2, gap='medium') with col1: # Number of Null Values st.markdown("
Number of Null Values", unsafe_allow_html=True) st.dataframe(df.isnull().sum().sort_values(ascending=False), height=300, use_container_width=True) with col2: # Percentage of Null Values st.markdown("
Percentage of Null Values", unsafe_allow_html=True) null_percentage = pd.DataFrame(round(df.isnull().sum()/df.shape[0]*100, 2)) null_percentage.columns = ['Percentage'] null_percentage['Percentage'] = null_percentage['Percentage'].map('{:.2f} %'.format) null_percentage = null_percentage.sort_values(by='Percentage', ascending=False) st.dataframe(null_percentage, height=300, use_container_width=True) # Heatmap col1, col2, col3 = st.columns([0.1,1,0.1]) with col2: new_line() st.markdown("
Plot for the Null Values ", unsafe_allow_html=True) null_values = df.isnull().sum() null_values = null_values[null_values > 0] null_values = null_values.sort_values(ascending=False) null_values = null_values.to_frame() null_values.columns = ['Count'] null_values.index.names = ['Feature'] null_values['Feature'] = null_values.index fig = px.bar(null_values, x='Feature', y='Count', color='Count', height=350) st.plotly_chart(fig, use_container_width=True) # INPUT col1, col2 = st.columns(2) with col1: missing_df_cols = df.columns[df.isnull().any()].tolist() if missing_df_cols: add_opt = ["All Numerical Features (ClickML Feature)", "All Categorical Feature (ClickML Feature)"] else: add_opt = [] fill_feat = st.multiselect("Select Features", missing_df_cols + add_opt , help="Select Features to fill missing values") with col2: strategy = st.selectbox("Select Missing Values Strategy", ["Select", "Drop Rows", "Drop Columns", "Fill with Mean", "Fill with Median", "Fill with Mode (Most Frequent)", "Fill with ffill, bfill"], help="Select Missing Values Strategy") if fill_feat and strategy != "Select": new_line() col1, col2, col3 = st.columns([1,0.5,1]) if col2.button("Apply", use_container_width=True, key="missing_apply", help="Apply Missing Values Strategy"): progress_bar() # All Numerical Features if "All Numerical Features (ClickML Feature)" in fill_feat: fill_feat.remove("All Numerical Features (ClickML Feature)") fill_feat += df.select_dtypes(include=np.number).columns.tolist() # All Categorical Features if "All Categorical Feature (ClickML Feature)" in fill_feat: fill_feat.remove("All Categorical Feature (ClickML Feature)") fill_feat += df.select_dtypes(include=np.object).columns.tolist() # Drop Rows if strategy == "Drop Rows": st.session_state.all_the_process += f""" # Drop Rows df[{fill_feat}] = df[{fill_feat}].dropna(axis=0) \n """ df[fill_feat] = df[fill_feat].dropna(axis=0) st.session_state['df'] = df st.success(f"Missing values have been dropped from the DataFrame for the features **`{fill_feat}`**.") # Drop Columns elif strategy == "Drop Columns": st.session_state.all_the_process += f""" # Drop Columns df[{fill_feat}] = df[{fill_feat}].dropna(axis=1) \n """ df[fill_feat] = df[fill_feat].dropna(axis=1) st.session_state['df'] = df st.success(f"The Columns **`{fill_feat}`** have been dropped from the DataFrame.") # Fill with Mean elif strategy == "Fill with Mean": st.session_state.all_the_process += f""" # Fill with Mean from sklearn.impute import SimpleImputer num_imputer = SimpleImputer(strategy='mean') df[{fill_feat}] = num_imputer.fit_transform(df[{fill_feat}]) \n """ from sklearn.impute import SimpleImputer num_imputer = SimpleImputer(strategy='mean') df[fill_feat] = num_imputer.fit_transform(df[fill_feat]) null_cat = df[missing_df_cols].select_dtypes(include=np.object).columns.tolist() if null_cat: st.session_state.all_the_process += f""" # Fill with Mode from sklearn.impute import SimpleImputer cat_imputer = SimpleImputer(strategy='most_frequent') df[{null_cat}] = cat_imputer.fit_transform(df[{null_cat}]) \n """ cat_imputer = SimpleImputer(strategy='most_frequent') df[null_cat] = cat_imputer.fit_transform(df[null_cat]) st.session_state['df'] = df if df.select_dtypes(include=np.object).columns.tolist(): st.success(f"The Columns **`{fill_feat}`** has been filled with the mean. And the categorical columns **`{null_cat}`** has been filled with the mode.") else: st.success(f"The Columns **`{fill_feat}`** has been filled with the mean.") # Fill with Median elif strategy == "Fill with Median": st.session_state.all_the_process += f""" # Fill with Median from sklearn.impute import SimpleImputer num_imputer = SimpleImputer(strategy='median') df[{fill_feat}] = pd.DataFrame(num_imputer.fit_transform(df[{fill_feat}]), columns=df[{fill_feat}].columns) \n """ from sklearn.impute import SimpleImputer num_imputer = SimpleImputer(strategy='median') df[fill_feat] = pd.DataFrame(num_imputer.fit_transform(df[fill_feat]), columns=df[fill_feat].columns) null_cat = df[missing_df_cols].select_dtypes(include=np.object).columns.tolist() if null_cat: st.session_state.all_the_process += f""" # Fill with Mode from sklearn.impute import SimpleImputer cat_imputer = SimpleImputer(strategy='most_frequent') df[{null_cat}] = cat_imputer.fit_transform(df[{null_cat}]) \n """ cat_imputer = SimpleImputer(strategy='most_frequent') df[null_cat] = cat_imputer.fit_transform(df[null_cat]) st.session_state['df'] = df if df.select_dtypes(include=np.object).columns.tolist(): st.success(f"The Columns **`{fill_feat}`** has been filled with the Median. And the categorical columns **`{null_cat}`** has been filled with the mode.") else: st.success(f"The Columns **`{fill_feat}`** has been filled with the Median.") # Fill with Mode elif strategy == "Fill with Mode (Most Frequent)": st.session_state.all_the_process += f""" # Fill with Mode from sklearn.impute import SimpleImputer imputer = SimpleImputer(strategy='most_frequent') df[{fill_feat}] = imputer.fit_transform(df[{fill_feat}]) \n """ from sklearn.impute import SimpleImputer imputer = SimpleImputer(strategy='most_frequent') df[fill_feat] = imputer.fit_transform(df[fill_feat]) st.session_state['df'] = df st.success(f"The Columns **`{fill_feat}`** has been filled with the Mode.") # Fill with ffill, bfill elif strategy == "Fill with ffill, bfill": st.session_state.all_the_process += f""" # Fill with ffill, bfill df[{fill_feat}] = df[{fill_feat}].fillna(method='ffill').fillna(method='bfill') \n """ df = df.fillna(method='ffill').fillna(method='bfill') st.session_state['df'] = df st.success("The DataFrame has been filled with ffill, bfill.") # Show DataFrame Button col1, col2, col3 = st.columns([0.15,1,0.15]) col2.divider() col1, col2, col3 = st.columns([0.9, 0.6, 1]) with col2: show_df = st.button("Show DataFrame", key="missing_show_df") if show_df: st.dataframe(df, use_container_width=True) # Encoding new_line() st.markdown("### πŸ”  Handling Categorical Data", unsafe_allow_html=True) new_line() with st.expander("Show Encoding"): new_line() # Explain exp_enc = st.checkbox("Explain Encoding", value=False, key='exp_enc') if exp_enc: col1, col2 = st.columns([0.8,1]) with col1: st.markdown("
Ordinal Encoding
", unsafe_allow_html=True) cola, colb = st.columns(2) with cola: st.write("Before Encoding") st.dataframe(pd.DataFrame(np.array(['a','b','c','b','a']) ),width=120, height=200) with colb: st.write("After Encoding") st.dataframe(pd.DataFrame(np.array([0,1,2,1,0])),width=120, height=200) with col2: st.markdown("
One Hot Encoding
", unsafe_allow_html=True) cola, colb = st.columns([0.7,1]) with cola: st.write("Before Encoding") st.dataframe(pd.DataFrame(np.array(['a','b','c', 'b','a']) ),width=150, height=200) with colb: st.write("After Encoding") st.dataframe(pd.DataFrame(np.array([[1,0,0],[0,1,0],[0,0,1],[0,1,0],[1,0,0]])),width=200, height=200) col1, col2, col3 = st.columns([0.5,1,0.5]) with col2: new_line() st.markdown("
Count Frequency Encoding
", unsafe_allow_html=True) cola, colb = st.columns([0.8,1]) with cola: st.write("Before Encoding") st.dataframe(pd.DataFrame(np.array(['a','b','c', 'b','a']) ),width=150, height=200) with colb: st.write("After Encoding") st.dataframe(pd.DataFrame(np.array([0.4,0.4,0.2,0.4,0.4])),width=200, height=200) new_line() # INFO show_cat = st.checkbox("Show Categorical Features", value=False, key='show_cat') # new_line() if show_cat: col1, col2 = st.columns(2) col1.dataframe(df.select_dtypes(include=np.object), height=250, use_container_width=True ) if len(df.select_dtypes(include=np.object).columns.tolist()) > 1: tmp = df.select_dtypes(include=np.object) tmp = tmp.apply(lambda x: x.unique()) tmp = tmp.to_frame() tmp.columns = ['Unique Values'] col2.dataframe(tmp, height=250, use_container_width=True ) # Further Analysis # new_line() further_analysis = st.checkbox("Further Analysis", value=False, key='further_analysis') if further_analysis: col1, col2 = st.columns([0.5,1]) with col1: # Each categorical feature has how many unique values as dataframe new_line() st.markdown("
Number of Unique Values", unsafe_allow_html=True) unique_values = pd.DataFrame(df.select_dtypes(include=np.object).nunique()) unique_values.columns = ['# Unique Values'] unique_values = unique_values.sort_values(by='# Unique Values', ascending=False) st.dataframe(unique_values, width=200, height=300) with col2: # Plot for the count of unique values for the categorical features new_line() st.markdown("
Plot for the Count of Unique Values ", unsafe_allow_html=True) unique_values = pd.DataFrame(df.select_dtypes(include=np.object).nunique()) unique_values.columns = ['# Unique Values'] unique_values = unique_values.sort_values(by='# Unique Values', ascending=False) unique_values['Feature'] = unique_values.index fig = px.bar(unique_values, x='Feature', y='# Unique Values', color='# Unique Values', height=350) st.plotly_chart(fig, use_container_width=True) # INPUT col1, col2 = st.columns(2) with col1: enc_feat = st.multiselect("Select Features", df.select_dtypes(include=np.object).columns.tolist(), key='encoding_feat', help="Select the categorical features to encode.") with col2: encoding = st.selectbox("Select Encoding", ["Select", "Ordinal Encoding", "One Hot Encoding", "Count Frequency Encoding"], key='encoding', help="Select the encoding method.") if enc_feat and encoding != "Select": new_line() col1, col2, col3 = st.columns([1,0.5,1]) if col2.button("Apply", key='encoding_apply',use_container_width=True ,help="Click to apply encoding."): progress_bar() # Ordinal Encoding new_line() if encoding == "Ordinal Encoding": st.session_state.all_the_process += f""" # Ordinal Encoding from sklearn.preprocessing import OrdinalEncoder encoder = OrdinalEncoder() cat_cols = {enc_feat} df[cat_cols] = encoder.fit_transform(df[cat_cols]) \n """ from sklearn.preprocessing import OrdinalEncoder encoder = OrdinalEncoder() cat_cols = enc_feat df[cat_cols] = encoder.fit_transform(df[cat_cols]) st.session_state['df'] = df st.success(f"The Categories of the features **`{enc_feat}`** have been encoded using Ordinal Encoding.") # One Hot Encoding elif encoding == "One Hot Encoding": st.session_state.all_the_process += f""" # One Hot Encoding df = pd.get_dummies(df, columns={enc_feat}) \n """ df = pd.get_dummies(df, columns=enc_feat) st.session_state['df'] = df st.success(f"The Categories of the features **`{enc_feat}`** have been encoded using One Hot Encoding.") # Count Frequency Encoding elif encoding == "Count Frequency Encoding": st.session_state.all_the_process += f""" # Count Frequency Encoding df[{enc_feat}] = df[{enc_feat}].apply(lambda x: x.map(len(df) / x.value_counts())) \n """ df[enc_feat] = df[enc_feat].apply(lambda x: x.map(len(df) / x.value_counts())) st.session_state['df'] = df st.success(f"The Categories of the features **`{enc_feat}`** have been encoded using Count Frequency Encoding.") # Show DataFrame Button # new_line() col1, col2, col3 = st.columns([0.15,1,0.15]) col2.divider() col1, col2, col3 = st.columns([1, 0.7, 1]) with col2: show_df = st.button("Show DataFrame", key="cat_show_df", help="Click to show the DataFrame.") if show_df: st.dataframe(df, use_container_width=True) # Scaling new_line() st.markdown("### βš–οΈ Scaling", unsafe_allow_html=True) new_line() with st.expander("Show Scaling"): new_line() # Scaling Methods scaling_methods = st.checkbox("Explain Scaling Methods", value=False, key='scaling_methods') if scaling_methods: new_line() col1, col2, col3 = st.columns(3) with col1: st.markdown("
Standard Scaling
" ,unsafe_allow_html=True) st.latex(r'''z = \frac{x - \mu}{\sigma}''') new_line() # Values Ranges for the output of Standard Scaling in general st.latex(r'''z \in [-3,3]''') with col2: st.markdown("
MinMax Scaling
", unsafe_allow_html=True) st.latex(r'''z = \frac{x - min(x)}{max(x) - min(x)}''') new_line() # Values Ranges for the output of MinMax Scaling in general st.latex(r'''z \in [0,1]''') with col3: st.markdown("
Robust Scaling
", unsafe_allow_html=True) st.latex(r'''z = \frac{x - Q_1}{Q_3 - Q_1}''') # Values Ranges for the output of Robust Scaling in general new_line() st.latex(r'''z \in [-2,2]''') # write z in the range for the output in latex st.latex(r''' ** Z = The\ Scaled\ Value ** ''') new_line() # Ranges for the numeric features feat_range = st.checkbox("Further Analysis", value=False, key='feat_range') if feat_range: new_line() st.write("The Ranges for the numeric features:") col1, col2, col3 = st.columns([0.05,1, 0.05]) with col2: st.dataframe(df.describe().T, width=700) new_line() # INPUT new_line() new_line() col1, col2 = st.columns(2) with col1: scale_feat = st.multiselect("Select Features", df.select_dtypes(include=np.number).columns.tolist(), help="Select the features to be scaled.") with col2: scaling = st.selectbox("Select Scaling", ["Select", "Standard Scaling", "MinMax Scaling", "Robust Scaling"], help="Select the scaling method.") if scale_feat and scaling != "Select": new_line() col1, col2, col3 = st.columns([1, 0.5, 1]) if col2.button("Apply", key='scaling_apply',use_container_width=True ,help="Click to apply scaling."): progress_bar() # Standard Scaling if scaling == "Standard Scaling": st.session_state.all_the_process += f""" # Standard Scaling from sklearn.preprocessing import StandardScaler scaler = StandardScaler() df[{scale_feat}] = pd.DataFrame(scaler.fit_transform(df[{scale_feat}]), columns=df[{scale_feat}].columns) \n """ from sklearn.preprocessing import StandardScaler scaler = StandardScaler() df[scale_feat] = pd.DataFrame(scaler.fit_transform(df[scale_feat]), columns=df[scale_feat].columns) st.session_state['df'] = df st.success(f"The Features **`{scale_feat}`** have been scaled using Standard Scaling.") # MinMax Scaling elif scaling == "MinMax Scaling": st.session_state.all_the_process += f""" # MinMax Scaling from sklearn.preprocessing import MinMaxScaler scaler = MinMaxScaler() df[{scale_feat}] = pd.DataFrame(scaler.fit_transform(df[{scale_feat}]), columns=df[{scale_feat}].columns) \n """ from sklearn.preprocessing import MinMaxScaler scaler = MinMaxScaler() df[scale_feat] = pd.DataFrame(scaler.fit_transform(df[scale_feat]), columns=df[scale_feat].columns) st.session_state['df'] = df st.success(f"The Features **`{scale_feat}`** have been scaled using MinMax Scaling.") # Robust Scaling elif scaling == "Robust Scaling": st.session_state.all_the_process += f""" # Robust Scaling from sklearn.preprocessing import RobustScaler scaler = RobustScaler() df[{scale_feat}] = pd.DataFrame(scaler.fit_transform(df[{scale_feat}]), columns=df[{scale_feat}].columns) \n """ from sklearn.preprocessing import RobustScaler scaler = RobustScaler() df[scale_feat] = pd.DataFrame(scaler.fit_transform(df[scale_feat]), columns=df[scale_feat].columns) st.session_state['df'] = df st.success(f"The Features **`{scale_feat}`** have been scaled using Robust Scaling.") # Show DataFrame Button col1, col2, col3 = st.columns([0.15,1,0.15]) col2.divider() col1, col2, col3 = st.columns([0.9, 0.6, 1]) with col2: show_df = st.button("Show DataFrame", key="scaling_show_df", help="Click to show the DataFrame.") if show_df: st.dataframe(df, use_container_width=True) # Data Transformation new_line() st.markdown("### 🧬 Data Transformation", unsafe_allow_html=True) new_line() with st.expander("Show Data Transformation"): new_line() # Transformation Methods trans_methods = st.checkbox("Explain Transformation Methods", key="trans_methods", value=False) if trans_methods: new_line() col1, col2, col3, col4 = st.columns(4) with col1: st.markdown("
Log
Transformation
", unsafe_allow_html=True) st.latex(r'''z = log(x)''') with col2: st.markdown("
Square Root Transformation
", unsafe_allow_html=True) st.latex(r'''z = \sqrt{x}''') with col3: st.markdown("
Cube Root Transformation
", unsafe_allow_html=True) st.latex(r'''z = \sqrt[3]{x}''') with col4: st.markdown("
Exponential Transformation
", unsafe_allow_html=True) st.latex(r'''z = e^x''') # INPUT new_line() col1, col2 = st.columns(2) with col1: trans_feat = st.multiselect("Select Features", df.select_dtypes(include=np.number).columns.tolist(), help="Select the features you want to transform.", key="transformation features") with col2: trans = st.selectbox("Select Transformation", ["Select", "Log Transformation", "Square Root Transformation", "Cube Root Transformation", "Exponential Transformation"], help="Select the transformation you want to apply.", key= "transformation") if trans_feat and trans != "Select": new_line() col1, col2, col3 = st.columns([1, 0.5, 1]) if col2.button("Apply", key='trans_apply',use_container_width=True ,help="Click to apply transformation."): progress_bar() # new_line() # Log Transformation if trans == "Log Transformation": st.session_state.all_the_process += f""" #Log Transformation df[{trans_feat}] = np.log1p(df[{trans_feat}]) \n """ df[trans_feat] = np.log1p(df[trans_feat]) st.session_state['df'] = df st.success("Numerical features have been transformed using Log Transformation.") # Square Root Transformation elif trans == "Square Root Transformation": st.session_state.all_the_process += f""" #Square Root Transformation df[{trans_feat}] = np.sqrt(df[{trans_feat}]) \n """ df[trans_feat] = np.sqrt(df[trans_feat]) st.session_state['df'] = df st.success("Numerical features have been transformed using Square Root Transformation.") # Cube Root Transformation elif trans == "Cube Root Transformation": st.session_state.all_the_process += f""" #Cube Root Transformation df[{trans_feat}] = np.cbrt(df[{trans_feat}]) \n """ df[trans_feat] = np.cbrt(df[trans_feat]) st.session_state['df'] = df st.success("Numerical features have been transformed using Cube Root Transformation.") # Exponential Transformation elif trans == "Exponential Transformation": st.session_state.all_the_process += f""" #Exponential Transformation df[{trans_feat}] = np.exp(df[{trans_feat}]) \n """ df[trans_feat] = np.exp(df[trans_feat]) st.session_state['df'] = df st.success("Numerical features have been transformed using Exponential Transformation.") # Show DataFrame Button # new_line() col1, col2, col3 = st.columns([0.15,1,0.15]) col2.divider() col1, col2, col3 = st.columns([0.9, 0.6, 1]) with col2: show_df = st.button("Show DataFrame", key="trans_show_df", help="Click to show the DataFrame.") if show_df: st.dataframe(df, use_container_width=True) # Feature Engineering new_line() st.markdown("### ⚑ Feature Engineering", unsafe_allow_html=True) new_line() with st.expander("Show Feature Engineering"): # Feature Extraction new_line() st.markdown("#### Feature Extraction", unsafe_allow_html=True) new_line() col1, col2, col3 = st.columns(3) with col1: feat1 = st.selectbox("First Feature/s", ["Select"] + df.select_dtypes(include=np.number).columns.tolist(), key="feat_ex1", help="Select the first feature/s you want to extract.") with col2: op = st.selectbox("Mathematical Operation", ["Select", "Addition +", "Subtraction -", "Multiplication *", "Division /"], key="feat_ex_op", help="Select the mathematical operation you want to apply.") with col3: feat2 = st.selectbox("Second Feature/s",["Select"] + df.select_dtypes(include=np.number).columns.tolist(), key="feat_ex2", help="Select the second feature/s you want to extract.") if feat1 and op != "Select" and feat2: col1, col2, col3 = st.columns(3) with col2: feat_name = st.text_input("Feature Name", key="feat_name", help="Enter the name of the new feature.") col1, col2, col3 = st.columns([1, 0.6, 1]) new_line() if col2.button("Extract Feature"): if feat_name == "": feat_name = f"({feat1} {op} {feat2})" if op == "Addition +": st.session_state.all_the_process += f""" # Feature Extraction - Addition df[{feat_name}] = df[{feat1}] + df[{feat2}] \n """ df[feat_name] = df[feat1] + df[feat2] st.session_state['df'] = df st.success(f"Feature '**_{feat_name}_**' has been extracted using Addition.") elif op == "Subtraction -": st.session_state.all_the_process += f""" # Feature Extraction - Subtraction df[{feat_name}] = df[{feat1}] - df[{feat2}] \n """ df[feat_name] = df[feat1] - df[feat2] st.session_state['df'] = df st.success(f"Feature {feat_name} has been extracted using Subtraction.") elif op == "Multiplication *": st.session_state.all_the_process += f""" # Feature Extraction - Multiplication df[{feat_name}] = df[{feat1}] * df[{feat2}] \n """ df[feat_name] = df[feat1] * df[feat2] st.session_state['df'] = df st.success(f"Feature {feat_name} has been extracted using Multiplication.") elif op == "Division /": st.session_state.all_the_process += f""" # Feature Extraction - Division df[{feat_name}] = df[{feat1}] / df[{feat2}] \n """ df[feat_name] = df[feat1[0]] / df[feat2[0]] st.session_state['df'] = df st.success(f"Feature {feat_name} has been extracted using Division.") # Feature Transformation st.divider() st.markdown("#### Feature Transformation", unsafe_allow_html=True) new_line() col1, col2, col3 = st.columns(3) with col1: feat_trans = st.multiselect("Select Feature/s", df.select_dtypes(include=np.number).columns.tolist(), help="Select the Features you want to Apply transformation operation on it") with col2: op = st.selectbox("Select Operation", ["Select", "Addition +", "Subtraction -", "Multiplication *", "Division /", ], key='feat_trans_op', help="Select the operation you want to apply on the feature") with col3: value = st.text_input("Enter Value", key='feat_trans_val', help="Enter the value you want to apply the operation on it") if op != "Select" and value != "": new_line() col1, col2, col3 = st.columns([1, 0.7, 1]) if col2.button("Transform Feature"): if op == "Addition +": st.session_state.all_the_process += f""" # Feature Transformation - Addition df[{feat_trans}] = df[{feat_trans}] + {value} \n """ df[feat_trans] = df[feat_trans] + float(value) st.session_state['df'] = df st.success(f"The Features **`{feat_trans}`** have been transformed using Addition with the value **`{value}`**.") elif op == "Subtraction -": st.session_state.all_the_process += f""" # Feature Transformation - Subtraction df[{feat_trans}] = df[{feat_trans}] - {value} \n """ df[feat_trans] = df[feat_trans] - float(value) st.session_state['df'] = df st.success(f"The Features **`{feat_trans}`** have been transformed using Subtraction with the value **`{value}`**.") elif op == "Multiplication *": st.session_state.all_the_process += f""" # Feature Transformation - Multiplication df[{feat_trans}] = df[{feat_trans}] * {value} \n """ df[feat_trans] = df[feat_trans] * float(value) st.session_state['df'] = df st.success(f"The Features **`{feat_trans}`** have been transformed using Multiplication with the value **`{value}`**.") elif op == "Division /": st.session_state.all_the_process += f""" # Feature Transformtaion - Division df[{feat_trans}] = df[{feat_trans}] / {value} \n """ df[feat_trans] = df[feat_trans] / float(value) st.session_state['df'] = df st.success(f"The Featueres **`{feat_trans}`** have been transformed using Division with the value **`{value}`**.") # Feature Selection st.divider() st.markdown("#### Feature Selection", unsafe_allow_html=True) new_line() feat_sel = st.multiselect("Select Feature/s", df.columns.tolist(), key='feat_sel', help="Select the Features you want to keep in the dataset") new_line() if feat_sel: col1, col2, col3 = st.columns([1, 0.7, 1]) if col2.button("Select Features"): st.session_state.all_the_process += f""" # Feature Selection\ndf = df[{feat_sel}] \n """ progress_bar() new_line() df = df[feat_sel] st.session_state['df'] = df st.success(f"The Features **`{feat_sel}`** have been selected.") # Show DataFrame Button col1, col2, col3 = st.columns([0.15,1,0.15]) col2.divider() col1, col2, col3 = st.columns([0.9, 0.6, 1]) with col2: show_df = st.button("Show DataFrame", key="feat_eng_show_df", help="Click to show the DataFrame.") if show_df: st.dataframe(df, use_container_width=True) # Data Splitting st.markdown("### πŸͺš Data Splitting", unsafe_allow_html=True) new_line() with st.expander("Show Data Splitting"): new_line() train_size, val_size, test_size = 0,0,0 col1, col2 = st.columns(2) with col1: target = st.selectbox("Select Target Variable", df.columns.tolist(), key='target', help="Target Variable is the variable that you want to predict.") st.session_state['target_variable'] = target with col2: sets = st.selectbox("Select The Split Sets", ["Select", "Train and Test", "Train, Validation, and Test"], key='sets', help="Train Set is the data used to train the model. Validation Set is the data used to validate the model. Test Set is the data used to test the model. ") st.session_state['split_sets'] = sets if sets != "Select" and target: if sets == "Train, Validation, and Test" : new_line() col1, col2, col3 = st.columns(3) with col1: train_size = st.number_input("Train Size", min_value=0.0, max_value=1.0, value=0.7, step=0.05, key='train_size') train_size = round(train_size, 2) with col2: val_size = st.number_input("Validation Size", min_value=0.0, max_value=1.0, value=0.15, step=0.05, key='val_size') val_size = round(val_size, 2) with col3: test_size = st.number_input("Test Size", min_value=0.0, max_value=1.0, value=0.15, step=0.05, key='test_size') test_size = round(test_size, 2) if float(train_size + val_size + test_size) != 1.0: new_line() st.error(f"The sum of Train, Validation, and Test sizes must be equal to 1.0, your sum is: **train** + **validation** + **test** = **{train_size}** + **{val_size}** + **{test_size}** = **{sum([train_size, val_size, test_size])}**" ) new_line() else: split_button = "" col1, col2, col3 = st.columns([1, 0.5, 1]) with col2: new_line() split_button = st.button("Split Data", use_container_width=True) if split_button: st.session_state.all_the_process += f""" # Data Splitting from sklearn.model_selection import train_test_split X_train, X_rem, y_train, y_rem = train_test_split(df.drop('{target}', axis=1), df['{target}'], train_size={train_size}, random_state=42) X_val, X_test, y_val, y_test = train_test_split(X_rem, y_rem, train_size= {val_size} / (1.0 - {train_size}),random_state=42) \n """ from sklearn.model_selection import train_test_split X_train, X_rem, y_train, y_rem = train_test_split(df.drop(target, axis=1), df[target], train_size=train_size, random_state=42) X_val, X_test, y_val, y_test = train_test_split(X_rem, y_rem, train_size= val_size / (1.0 - train_size),random_state=42) st.session_state['X_train'] = X_train st.session_state['X_val'] = X_val st.session_state['X_test'] = X_test st.session_state['y_train'] = y_train st.session_state['y_val'] = y_val st.session_state['y_test'] = y_test col1, col2, col3 = st.columns(3) if split_button: st.success("Data Splitting Done!") with col1: st.write("Train Set") st.write("X Train Shape: ", X_train.shape) st.write("Y Train Shape: ", y_train.shape) train = pd.concat([X_train, y_train], axis=1) train_csv = train.to_csv(index=False).encode('utf-8') st.download_button("Download Train Set", train_csv, "train.csv", "text/csv", key='train3') with col2: st.write("Validation Set") st.write("X Validation Shape: ", X_val.shape) st.write("Y Validation Shape: ", y_val.shape) val = pd.concat([X_val, y_val], axis=1) val_csv = val.to_csv(index=False).encode('utf-8') st.download_button("Download Validation Set", val_csv, "validation.csv", key='val3') with col3: st.write("Test Set") st.write("X Test Shape: ", X_test.shape) st.write("Y Test Shape: ", y_test.shape) test = pd.concat([X_test, y_test], axis=1) test_csv = test.to_csv(index=False).encode('utf-8') st.download_button("Download Test Set", test_csv, "test.csv", key='test3') elif sets == "Train and Test": new_line() col1, col2 = st.columns(2) with col1: train_size = st.number_input("Train Size", min_value=0.0, max_value=1.0, value=0.7, step=0.05, key='train_size') train_size = round(train_size, 2) with col2: test_size = st.number_input("Test Size", min_value=0.0, max_value=1.0, value=0.30, step=0.05, key='val_size') test_size = round(test_size, 2) if float(train_size + test_size) != 1.0: new_line() st.error(f"The sum of Train, Validation, and Test sizes must be equal to 1.0, your sum is: **train** + **test** = **{train_size}** + **{test_size}** = **{sum([train_size, test_size])}**" ) new_line() else: split_button = "" col1, col2, col3 = st.columns([1, 0.5, 1]) with col2: new_line() split_button = st.button("Split Data") if split_button: st.session_state.all_the_process += f""" # Data Splitting from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(df.drop('{target}', axis=1), df['{target}'], train_size={train_size}, random_state=42) \n """ from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(df.drop(target, axis=1), df[target], train_size=train_size, random_state=42) st.session_state['X_train'] = X_train st.session_state['X_test'] = X_test st.session_state['y_train'] = y_train st.session_state['y_test'] = y_test col1, col2 = st.columns(2) if split_button: st.success("Data Splitting Done!") with col1: st.write("Train Set") st.write("X Train Shape: ", X_train.shape) st.write("Y Train Shape: ", y_train.shape) train = pd.concat([X_train, y_train], axis=1) train_csv = train.to_csv(index=False).encode('utf-8') st.download_button("Download Train Set", train_csv, "train.csv", key='train2') with col2: st.write("Test Set") st.write("X test Shape: ", X_test.shape) st.write("Y test Shape: ", y_test.shape) test = pd.concat([X_test, y_test], axis=1) test_csv = test.to_csv(index=False).encode('utf-8') st.download_button("Download Test Set", test_csv, "test.csv", key='test2') # Building the model new_line() st.markdown("### πŸ€– Building the Model") new_line() problem_type = "" with st.expander(" Model Building"): target, problem_type, model = "", "", "" col1, col2, col3 = st.columns(3) with col1: target = st.selectbox("Target Variable", [st.session_state['target_variable']] , key='target_ml', help="The target variable is the variable that you want to predict") new_line() with col2: problem_type = st.selectbox("Problem Type", ["Select", "Classification", "Regression"], key='problem_type', help="The problem type is the type of problem that you want to solve") with col3: if problem_type == "Classification": model = st.selectbox("Model", ["Select", "Logistic Regression", "K-Nearest Neighbors", "Support Vector Machine", "Decision Tree", "Random Forest", "XGBoost", "LightGBM", "CatBoost"], key='model', help="The model is the algorithm that you want to use to solve the problem") new_line() elif problem_type == "Regression": model = st.selectbox("Model", ["Linear Regression", "K-Nearest Neighbors", "Support Vector Machine", "Decision Tree", "Random Forest", "XGBoost", "LightGBM", "CatBoost"], key='model', help="The model is the algorithm that you want to use to solve the problem") new_line() if target != "Select" and problem_type and model: if problem_type == "Classification": if model == "Logistic Regression": col1, col2, col3 = st.columns(3) with col1: penalty = st.selectbox("Penalty (Optional)", ["l2", "l1", "none", "elasticnet"], key='penalty') with col2: solver = st.selectbox("Solver (Optional)", ["lbfgs", "newton-cg", "liblinear", "sag", "saga"], key='solver') with col3: C = st.number_input("C (Optional)", min_value=0.0, max_value=1.0, value=1.0, step=0.05, key='C') col1, col2, col3 = st.columns([1,1,1]) if col2.button("Train Model", use_container_width=True): progress_bar() st.session_state['trained_model_bool'] = True # Train the model st.session_state.all_the_process += f""" # Model Building --> Logistic Regression from sklearn.linear_model import LogisticRegression model = LogisticRegression(penalty='{penalty}', solver='{solver}', C={C}, random_state=42) model.fit(X_train, y_train) \n """ from sklearn.linear_model import LogisticRegression model = LogisticRegression(penalty=penalty, solver=solver, C=C, random_state=42) model.fit(X_train, y_train) st.session_state['trained_model'] = model st.success("Model Trained Successfully!") # save the model import joblib joblib.dump(model, 'model.pkl') # Download the model model_file = open("model.pkl", "rb") model_bytes = model_file.read() col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model') if model == "K-Nearest Neighbors": col1, col2, col3 = st.columns(3) with col1: n_neighbors = st.number_input("N Neighbors **Required**", min_value=1, max_value=100, value=5, step=1, key='n_neighbors') with col2: weights = st.selectbox("Weights (Optional)", ["uniform", "distance"], key='weights') with col3: algorithm = st.selectbox("Algorithm (Optional)", ["auto", "ball_tree", "kd_tree", "brute"], key='algorithm') col1, col2, col3 = st.columns([1,0.7,1]) if col2.button("Train Model", use_container_width=True): progress_bar() st.session_state['trained_model_bool'] = True # Train the model st.session_state.all_the_process += f""" # Model Building --> K-Nearest Neighbors from sklearn.neighbors import KNeighborsClassifier model = KNeighborsClassifier(n_neighbors={n_neighbors}, weights='{weights}', algorithm='{algorithm}') model.fit(X_train, y_train) \n """ from sklearn.neighbors import KNeighborsClassifier model = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights, algorithm=algorithm) model.fit(X_train, y_train) st.session_state['trained_model'] = model st.success("Model Trained Successfully!") # save the model import joblib joblib.dump(model, 'model.pkl') # Download the model model_file = open("model.pkl", "rb") model_bytes = model_file.read() col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model') if model == "Support Vector Machine": col1, col2, col3 = st.columns(3) with col1: kernel = st.selectbox("Kernel (Optional)", ["rbf", "poly", "linear", "sigmoid", "precomputed"], key='kernel') with col2: degree = st.number_input("Degree (Optional)", min_value=1, max_value=100, value=3, step=1, key='degree') with col3: C = st.number_input("C (Optional)", min_value=0.0, max_value=1.0, value=1.0, step=0.05, key='C') col1, col2, col3 = st.columns([1,0.7,1]) if col2.button("Train Model", use_container_width=True): progress_bar() st.session_state['trained_model_bool'] = True # Train the model st.session_state.all_the_process += f""" # Model Building --> Support Vector Machine from sklearn.svm import SVC model = SVC(kernel='{kernel}', degree={degree}, C={C}, random_state=42) model.fit(X_train, y_train) \n """ from sklearn.svm import SVC model = SVC(kernel=kernel, degree=degree, C=C, random_state=42) model.fit(X_train, y_train) st.session_state['trained_model'] = model st.success("Model Trained Successfully!") # save the model import joblib joblib.dump(model, 'model.pkl') # Download the model model_file = open("model.pkl", "rb") model_bytes = model_file.read() col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model') if model == "Decision Tree": col1, col2, col3 = st.columns(3) with col1: criterion = st.selectbox("Criterion (Optional)", ["gini", "entropy", "log_loss"], key='criterion') with col2: splitter = st.selectbox("Splitter (Optional)", ["best", "random"], key='splitter') with col3: min_samples_split = st.number_input("Min Samples Split (Optional)", min_value=1, max_value=100, value=2, step=1, key='min_samples_split') col1, col2, col3 = st.columns([1,0.7,1]) if col2.button("Train Model", use_container_width=True): progress_bar() st.session_state['trained_model_bool'] = True # Train the model st.session_state.all_the_process += f""" # Model Building --> Decision Tree from sklearn.tree import DecisionTreeClassifier model = DecisionTreeClassifier(criterion='{criterion}', splitter='{splitter}', min_samples_split={min_samples_split}, random_state=42) model.fit(X_train, y_train) \n """ from sklearn.tree import DecisionTreeClassifier model = DecisionTreeClassifier(criterion=criterion, splitter=splitter, min_samples_split=min_samples_split, random_state=42) model.fit(X_train, y_train) st.session_state['trained_model'] = model st.success("Model Trained Successfully!") # save the model import joblib joblib.dump(model, 'model.pkl') # Download the model model_file = open("model.pkl", "rb") model_bytes = model_file.read() col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model') if model == "Random Forest": col1, col2, col3 = st.columns(3) with col1: n_estimators = st.number_input("N Estimators (Optional)", min_value=1, max_value=1000, value=100, step=5, key='n_estimators') with col2: criterion = st.selectbox("Criterion (Optional)", ["gini", "entropy", "log_loss"], key='criterion') with col3: min_samples_split = st.number_input("Min Samples Split (Optional)", min_value=1, max_value=100, value=2, step=1, key='min_samples_split') col1, col2, col3 = st.columns([1,0.7,1]) if col2.button("Train Model", use_container_width=True): progress_bar() st.session_state['trained_model_bool'] = True # Train the model st.session_state.all_the_process += f""" # Model Building --> Random Forest from sklearn.ensemble import RandomForestClassifier model = RandomForestClassifier(n_estimators={n_estimators}, criterion='{criterion}', min_samples_split={min_samples_split}, random_state=42) model.fit(X_train, y_train) \n """ from sklearn.ensemble import RandomForestClassifier model = RandomForestClassifier(n_estimators=n_estimators, criterion=criterion, min_samples_split=min_samples_split, random_state=42) model.fit(X_train, y_train) st.session_state['trained_model'] = model st.success("Model Trained Successfully!") # save the model import joblib joblib.dump(model, 'model.pkl') # Download the model model_file = open("model.pkl", "rb") model_bytes = model_file.read() col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model') if model == "XGBoost": col1, col2, col3 = st.columns(3) with col1: n_estimators = st.number_input("N Estimators (Optional)", min_value=1, max_value=1000, value=100, step=5, key='n_estimators') with col2: learning_rate = st.number_input("Learning Rate (Optional)", min_value=0.0, max_value=1.0, value=0.1, step=0.05, key='learning_rate') with col3: booster = st.selectbox("Booster (Optional)", ["gbtree", "gblinear", "dart"], key='booster') col1, col2, col3 = st.columns([1,0.7,1]) if col2.button("Train Model"): progress_bar() st.session_state['trained_model_bool'] = True # Train the model st.session_state.all_the_process += f""" # Model Building --> XGBoost from xgboost import XGBClassifier model = XGBClassifier(n_estimators={n_estimators}, learning_rate={learning_rate}, booster='{booster}', random_state=42) model.fit(X_train, y_train) \n """ from xgboost import XGBClassifier model = XGBClassifier(n_estimators=n_estimators, learning_rate=learning_rate, booster=booster, random_state=42) model.fit(X_train, y_train) st.session_state['trained_model'] = model st.success("Model Trained Successfully!") # save the model import joblib joblib.dump(model, 'model.pkl') # Download the model model_file = open("model.pkl", "rb") model_bytes = model_file.read() col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model') if model == 'LightGBM': col1, col2, col3 = st.columns(3) with col1: n_estimators = st.number_input("N Estimators (Optional)", min_value=1, max_value=1000, value=100, step=5, key='n_estimators') with col2: learning_rate = st.number_input("Learning Rate (Optional)", min_value=0.0, max_value=1.0, value=0.1, step=0.05, key='learning_rate') with col3: boosting_type = st.selectbox("Boosting Type (Optional)", ["gbdt", "dart", "goss", "rf"], key='boosting_type') col1, col2, col3 = st.columns([1,0.7,1]) if col2.button("Train Model"): progress_bar() st.session_state['trained_model_bool'] = True # Train the model st.session_state.all_the_process += f""" # Model Building --> LightGBM from lightgbm import LGBMClassifier model = LGBMClassifier(n_estimators={n_estimators}, learning_rate={learning_rate}, boosting_type='{boosting_type}', random_state=42) model.fit(X_train, y_train) \n """ from lightgbm import LGBMClassifier model = LGBMClassifier(n_estimators=n_estimators, learning_rate=learning_rate, boosting_type=boosting_type, random_state=42) model.fit(X_train, y_train) st.session_state['trained_model'] = model st.success("Model Trained Successfully!") # save the model import joblib joblib.dump(model, 'model.pkl') # Download the model model_file = open("model.pkl", "rb") model_bytes = model_file.read() col2.download_button("Download Model", model_bytes, "model.pkl", key='save_model') if model == 'CatBoost': col1, col2, col3 = st.columns(3) with col1: n_estimators = st.number_input("N Estimators (Optional)", min_value=1, max_value=1000, value=100, step=5, key='n_estimators') with col2: learning_rate = st.number_input("Learning Rate (Optional)", min_value=0.0, max_value=1.0, value=0.1, step=0.05, key='learning_rate') with col3: boosting_type = st.selectbox("Boosting Type (Optional)", ["Ordered", "Plain"], key='boosting_type') col1, col2, col3 = st.columns([1,0.7,1]) if col2.button("Train Model"): progress_bar() st.session_state['trained_model_bool'] = True # Train the model st.session_state.all_the_process += f""" # Model Building --> CatBoost from catboost import CatBoostClassifier model = CatBoostClassifier(n_estimators={n_estimators}, learning_rate={learning_rate}, boosting_type='{boosting_type}', random_state=42) model.fit(X_train, y_train) \n """ from catboost import CatBoostClassifier model = CatBoostClassifier(n_estimators=n_estimators, learning_rate=learning_rate, boosting_type=boosting_type, random_state=42) model.fit(X_train, y_train) st.session_state['trained_model'] = model st.success("Model Trained Successfully!") # save the model import joblib joblib.dump(model, 'model.pkl') # Download the model model_file = open("model.pkl", "rb") model_bytes = model_file.read() col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model') if problem_type == "Regression": if model == "Linear Regression": col1, col2, col3 = st.columns(3) with col1: fit_intercept = st.selectbox("Fit Intercept (Optional)", [True, False], key='normalize') with col2: positive = st.selectbox("Positve (Optional)", [True, False], key='positive') with col3: copy_x = st.selectbox("Copy X (Optional)", [True, False], key='copy_x') col1, col2, col3 = st.columns([1,0.7,1]) if col2.button("Train Model"): progress_bar() st.session_state['trained_model_bool'] = True # Train the model st.session_state.all_the_process += f""" # Model Building --> Linear Regression from sklearn.linear_model import LinearRegression model = LinearRegression(fit_intercept={fit_intercept}, positive={positive}, copy_X={copy_x}) model.fit(X_train, y_train) \n """ from sklearn.linear_model import LinearRegression model = LinearRegression(fit_intercept=fit_intercept, positive=positive, copy_X=copy_x) model.fit(X_train, y_train) st.session_state['trained_model'] = model st.success("Model Trained Successfully!") # save the model import joblib joblib.dump(model, 'model.pkl') # Download the model model_file = open("model.pkl", "rb") model_bytes = model_file.read() col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model') if model == "K-Nearest Neighbors": col1, col2, col3 = st.columns(3) with col1: n_neighbors = st.number_input("N Neighbors (Optional)", min_value=1, max_value=100, value=5, step=1, key='n_neighbors') with col2: weights = st.selectbox("Weights (Optional)", ["uniform", "distance"], key='weights') with col3: algorithm = st.selectbox("Algorithm (Optional)", ["auto", "ball_tree", "kd_tree", "brute"], key='algorithm') col1, col2, col3 = st.columns([1,0.7,1]) if col2.button("Train Model"): progress_bar() st.session_state['trained_model_bool'] = True # Train the model st.session_state.all_the_process += f""" # Model Building --> K-Nearest Neighbors from sklearn.neighbors import KNeighborsRegressor model = KNeighborsRegressor(n_neighbors={n_neighbors}, weights='{weights}', algorithm='{algorithm}') model.fit(X_train, y_train) \n """ from sklearn.neighbors import KNeighborsRegressor model = KNeighborsRegressor(n_neighbors=n_neighbors, weights=weights, algorithm=algorithm) model.fit(X_train, y_train) st.session_state['trained_model'] = model st.success("Model Trained Successfully!") # save the model import joblib joblib.dump(model, 'model.pkl') # Download the model model_file = open("model.pkl", "rb") model_bytes = model_file.read() col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model') if model == "Support Vector Machine": col1, col2, col3 = st.columns(3) with col1: kernel = st.selectbox("Kernel (Optional)", ["linear", "poly", "rbf", "sigmoid", "precomputed"], key='kernel') with col2: degree = st.number_input("Degree (Optional)", min_value=1, max_value=10, value=3, step=1, key='degree') with col3: gamma = st.selectbox("Gamma (Optional)", ["scale", "auto"], key='gamma') col1, col2, col3 = st.columns([1,0.7,1]) if col2.button("Train Model"): progress_bar() st.session_state['trained_model_bool'] = True # Train the model st.session_state.all_the_process += f""" # Model Building --> Support Vector Machine from sklearn.svm import SVR model = SVR(kernel='{kernel}', degree={degree}, gamma='{gamma}') model.fit(X_train, y_train) \n """ from sklearn.svm import SVR model = SVR(kernel=kernel, degree=degree, gamma=gamma) model.fit(X_train, y_train) st.session_state['trained_model'] = model st.success("Model Trained Successfully!") # save the model import joblib joblib.dump(model, 'model.pkl') # Download the model model_file = open("model.pkl", "rb") model_bytes = model_file.read() col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model') if model == "Decision Tree": col1, col2, col3 = st.columns(3) with col1: criterion = st.selectbox("Criterion (Optional)", ["squared_error", "friedman_mse", "absolute_error", "poisson"], key='criterion') with col2: splitter = st.selectbox("Splitter (Optional)", ["best", "random"], key='splitter') with col3: min_samples_split = st.number_input("Min Samples Split (Optional)", min_value=1, max_value=10, value=2, step=1, key='min_samples_split') col1, col2, col3 = st.columns([1,0.7,1]) if col2.button("Train Model"): progress_bar() st.session_state['trained_model_bool'] = True # Train the model st.session_state.all_the_process += f""" # Model Building --> Decision Tree from sklearn.tree import DecisionTreeRegressor model = DecisionTreeRegressor(criterion='{criterion}', splitter='{splitter}', min_samples_split={min_samples_split}) model.fit(X_train, y_train) \n """ from sklearn.tree import DecisionTreeRegressor model = DecisionTreeRegressor(criterion=criterion, splitter=splitter, min_samples_split=min_samples_split) model.fit(X_train, y_train) st.session_state['trained_model'] = model st.success("Model Trained Successfully!") # save the model import joblib joblib.dump(model, 'model.pkl') # Download the model model_file = open("model.pkl", "rb") model_bytes = model_file.read() col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model') if model == "Random Forest": col1, col2, col3 = st.columns(3) with col1: n_estimators = st.number_input("N Estimators (Optional)", min_value=1, max_value=1000, value=100, step=1, key='n_estimators') with col2: criterion = st.selectbox("Criterion (Optional)", ["squared_error", "friedman_mse", "absolute_error", "poisson"], key='criterion') with col3: min_samples_split = st.number_input("Min Samples Split (Optional)", min_value=1, max_value=10, value=2, step=1, key='min_samples_split') col1, col2, col3 = st.columns([1,0.7,1]) if col2.button("Train Model"): progress_bar() st.session_state['trained_model_bool'] = True # Train the model st.session_state.all_the_process += f""" # Model Building --> Random Forest from sklearn.ensemble import RandomForestRegressor model = RandomForestRegressor(n_estimators={n_estimators}, criterion='{criterion}', min_samples_split={min_samples_split}) model.fit(X_train, y_train) \n """ from sklearn.ensemble import RandomForestRegressor model = RandomForestRegressor(n_estimators=n_estimators, criterion=criterion, min_samples_split=min_samples_split) model.fit(X_train, y_train) st.session_state['trained_model'] = model st.success("Model Trained Successfully!") # save the model import joblib joblib.dump(model, 'model.pkl') # Download the model model_file = open("model.pkl", "rb") model_bytes = model_file.read() col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model') if model == "XGBoost": col1, col2, col3 = st.columns(3) with col1: n_estimators = st.number_input("N Estimators (Optional)", min_value=1, max_value=1000, value=100, step=1, key='n_estimators') with col2: learning_rate = st.number_input("Learning Rate (Optional)", min_value=0.0001, max_value=1.0, value=0.1, step=0.1, key='learning_rate') with col3: booster = st.selectbox("Booster (Optional)", ["gbtree", "gblinear", "dart"], key='booster') col1, col2, col3 = st.columns([1,0.7,1]) if col2.button("Train Model"): progress_bar() st.session_state['trained_model_bool'] = True # Train the model st.session_state.all_the_process += f""" # Model Building --> XGBoost from xgboost import XGBRegressor model = XGBRegressor(n_estimators={n_estimators}, learning_rate={learning_rate}, booster='{booster}') model.fit(X_train, y_train) \n """ from xgboost import XGBRegressor model = XGBRegressor(n_estimators=n_estimators, learning_rate=learning_rate, booster=booster) model.fit(X_train, y_train) st.session_state['trained_model'] = model st.success("Model Trained Successfully!") # save the model import joblib joblib.dump(model, 'model.pkl') # Download the model model_file = open("model.pkl", "rb") model_bytes = model_file.read() col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model') if model == "LightGBM": col1, col2, col3 = st.columns(3) with col1: n_estimators = st.number_input("N Estimators (Optional)", min_value=1, max_value=1000, value=100, step=1, key='n_estimators') with col2: learning_rate = st.number_input("Learning Rate (Optional)", min_value=0.1, max_value=1.0, value=0.1, step=0.1, key='learning_rate') with col3: boosting_type = st.selectbox("Boosting Type (Optional)", ["gbdt", "dart", "goss", "rf"], key='boosting_type') col1, col2, col3 = st.columns([1,0.7,1]) if col2.button("Train Model"): progress_bar() st.session_state['trained_model_bool'] = True # Train the model st.session_state.all_the_process += f""" # Model Building --> LightGBM from lightgbm import LGBMRegressor model = LGBMRegressor(n_estimators={n_estimators}, learning_rate={learning_rate}, boosting_type='{boosting_type}') model.fit(X_train, y_train) \n """ from lightgbm import LGBMRegressor model = LGBMRegressor(n_estimators=n_estimators, learning_rate=learning_rate, boosting_type=boosting_type) model.fit(X_train, y_train) st.session_state['trained_model'] = model st.success("Model Trained Successfully!") # save the model import joblib joblib.dump(model, 'model.pkl') # Download the model model_file = open("model.pkl", "rb") model_bytes = model_file.read() col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model') if model == "CatBoost": col1, col2, col3 = st.columns(3) with col1: n_estimators = st.number_input("N Estimators (Optional)", min_value=1, max_value=1000, value=100, step=1, key='n_estimators') with col2: learning_rate = st.number_input("Learning Rate (Optional)", min_value=0.1, max_value=1.0, value=0.1, step=0.1, key='learning_rate') with col3: boosting_type = st.selectbox("Boosting Type (Optional)", ["Ordered", "Plain"], key='boosting_type') col1, col2, col3 = st.columns([1,0.7,1]) if col2.button("Train Model"): progress_bar() st.session_state['trained_model_bool'] = True # Train the model st.session_state.all_the_process += f""" # Model Building --> CatBoost from catboost import CatBoostRegressor model = CatBoostRegressor(n_estimators={n_estimators}, learning_rate={learning_rate}, boosting_type='{boosting_type}') model.fit(X_train, y_train) \n """ from catboost import CatBoostRegressor model = CatBoostRegressor(n_estimators=n_estimators, learning_rate=learning_rate, boosting_type=boosting_type) model.fit(X_train, y_train) st.session_state['trained_model'] = model st.success("Model Trained Successfully!") # save the model import joblib joblib.dump(model, 'model.pkl') # Download the model model_file = open("model.pkl", "rb") model_bytes = model_file.read() col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model') # Evaluation if st.session_state['trained_model_bool']: st.markdown("### πŸ“ˆ Evaluation") new_line() with st.expander("Model Evaluation"): # Load the model import joblib model = joblib.load('model.pkl') if str(model) not in st.session_state.lst_models_predctions: st.session_state.lst_models_predctions.append(str(model)) st.session_state.lst_models.append(str(model)) if str(model) not in st.session_state.models_with_eval.keys(): st.session_state.models_with_eval[str(model)] = [] # Predictions if st.session_state["split_sets"] == "Train, Validation, and Test": st.session_state.all_the_process += f""" # Predictions y_pred_train = model.predict(X_train) y_pred_val = model.predict(X_val) y_pred_test = model.predict(X_test) \n """ y_pred_train = model.predict(X_train) st.session_state.y_pred_train = y_pred_train y_pred_val = model.predict(X_val) st.session_state.y_pred_val = y_pred_val y_pred_test = model.predict(X_test) st.session_state.y_pred_test = y_pred_test elif st.session_state["split_sets"] == "Train and Test": st.session_state.all_the_process += f""" # Predictions y_pred_train = model.predict(X_train) y_pred_test = model.predict(X_test) \n """ y_pred_train = model.predict(X_train) st.session_state.y_pred_train = y_pred_train y_pred_test = model.predict(X_test) st.session_state.y_pred_test = y_pred_test # Choose Evaluation Metric if st.session_state['problem_type'] == "Classification": evaluation_metric = st.multiselect("Evaluation Metric", ["Accuracy", "Precision", "Recall", "F1 Score", "AUC Score"], key='evaluation_metric') elif st.session_state['problem_type'] == "Regression": evaluation_metric = st.multiselect("Evaluation Metric", ["Mean Absolute Error (MAE)", "Mean Squared Error (MSE)", "Root Mean Squared Error (RMSE)", "R2 Score"], key='evaluation_metric') col1, col2, col3 = st.columns([1, 0.6, 1]) st.session_state.show_eval = True if evaluation_metric != []: for metric in evaluation_metric: if metric == "Accuracy": # Check if Accuary is element of the list of that model if "Accuracy" not in st.session_state.models_with_eval[str(model)]: st.session_state.models_with_eval[str(model)].append("Accuracy") if st.session_state["split_sets"] == "Train, Validation, and Test": st.session_state.all_the_process += f""" # Evaluation - Accuracy from sklearn.metrics import accuracy_score print("Accuracy Score on Train Set: ", accuracy_score(y_train, y_pred_train)) print("Accuracy Score on Validation Set: ", accuracy_score(y_val, y_pred_val)) print("Accuracy Score on Test Set: ", accuracy_score(y_test, y_pred_test)) \n """ from sklearn.metrics import accuracy_score train_acc = accuracy_score(y_train, y_pred_train) val_acc = accuracy_score(y_val, y_pred_val) test_acc = accuracy_score(y_test, y_pred_test) metrics_df[metric] = [train_acc, val_acc, test_acc] st.session_state['metrics_df'] = metrics_df else: st.session_state.all_the_process += f""" # Evaluation - Accuracy from sklearn.metrics import accuracy_score print("Accuracy Score on Train Set: ", accuracy_score(y_train, y_pred_train)) print("Accuracy Score on Test Set: ", accuracy_score(y_test, y_pred_test)) \n """ from sklearn.metrics import accuracy_score train_acc = accuracy_score(y_train, y_pred_train) test_acc = accuracy_score(y_test, y_pred_test) metrics_df[metric] = [train_acc, test_acc] st.session_state['metrics_df'] = metrics_df elif metric == "Precision": if "Precision" not in st.session_state.models_with_eval[str(model)]: st.session_state.models_with_eval[str(model)].append("Precision") if st.session_state["split_sets"] == "Train, Validation, and Test": st.session_state.all_the_process += f""" # Evaluation - Precision from sklearn.metrics import precision_score print("Precision Score on Train Set: ", precision_score(y_train, y_pred_train)) print("Precision Score on Validation Set: ", precision_score(y_val, y_pred_val)) print("Precision Score on Test Set: ", precision_score(y_test, y_pred_test)) \n """ from sklearn.metrics import precision_score train_prec = precision_score(y_train, y_pred_train) val_prec = precision_score(y_val, y_pred_val) test_prec = precision_score(y_test, y_pred_test) metrics_df[metric] = [train_prec, val_prec, test_prec] st.session_state['metrics_df'] = metrics_df else: st.session_state.all_the_process += f""" # Evaluation - Precision from sklearn.metrics import precision_score print("Precision Score on Train Set: ", precision_score(y_train, y_pred_train)) print("Precision Score on Test Set: ", precision_score(y_test, y_pred_test)) \n """ from sklearn.metrics import precision_score train_prec = precision_score(y_train, y_pred_train) test_prec = precision_score(y_test, y_pred_test) metrics_df[metric] = [train_prec, test_prec] st.session_state['metrics_df'] = metrics_df elif metric == "Recall": if "Recall" not in st.session_state.models_with_eval[str(model)]: st.session_state.models_with_eval[str(model)].append("Recall") if st.session_state["split_sets"] == "Train, Validation, and Test": st.session_state.all_the_process += f""" # Evaluation - Recall from sklearn.metrics import recall_score print("Recall Score on Train Set: ", recall_score(y_train, y_pred_train)) print("Recall Score on Validation Set: ", recall_score(y_val, y_pred_val)) print("Recall Score on Test Set: ", recall_score(y_test, y_pred_test)) \n """ from sklearn.metrics import recall_score train_rec = recall_score(y_train, y_pred_train) val_rec = recall_score(y_val, y_pred_val) test_rec = recall_score(y_test, y_pred_test) metrics_df[metric] = [train_rec, val_rec, test_rec] st.session_state['metrics_df'] = metrics_df else: st.session_state.all_the_process += f""" # Evaluation - Recall from sklearn.metrics import recall_score print("Recall Score on Train Set: ", recall_score(y_train, y_pred_train)) print("Recall Score on Test Set: ", recall_score(y_test, y_pred_test)) \n """ from sklearn.metrics import recall_score train_rec = recall_score(y_train, y_pred_train) test_rec = recall_score(y_test, y_pred_test) metrics_df[metric] = [train_rec, test_rec] st.session_state['metrics_df'] = metrics_df elif metric == "F1 Score": if "F1 Score" not in st.session_state.models_with_eval[str(model)]: st.session_state.models_with_eval[str(model)].append("F1 Score") if st.session_state["split_sets"] == "Train, Validation, and Test": st.session_state.all_the_process += f""" # Evaluation - F1 Score from sklearn.metrics import f1_score print("F1 Score on Train Set: ", f1_score(y_train, y_pred_train)) print("F1 Score on Validation Set: ", f1_score(y_val, y_pred_val)) print("F1 Score on Test Set: ", f1_score(y_test, y_pred_test)) \n """ from sklearn.metrics import f1_score train_f1 = f1_score(y_train, y_pred_train) val_f1 = f1_score(y_val, y_pred_val) test_f1 = f1_score(y_test, y_pred_test) metrics_df[metric] = [train_f1, val_f1, test_f1] st.session_state['metrics_df'] = metrics_df else: st.session_state.all_the_process += f""" # Evaluation - F1 Score from sklearn.metrics import f1_score print("F1 Score on Train Set: ", f1_score(y_train, y_pred_train)) print("F1 Score on Test Set: ", f1_score(y_test, y_pred_test)) \n """ from sklearn.metrics import f1_score train_f1 = f1_score(y_train, y_pred_train) test_f1 = f1_score(y_test, y_pred_test) metrics_df[metric] = [train_f1, test_f1] st.session_state['metrics_df'] = metrics_df elif metric == "AUC Score": if "AUC Score" not in st.session_state.models_with_eval[str(model)]: st.session_state.models_with_eval[str(model)].append("AUC Score") if st.session_state["split_sets"] == "Train, Validation, and Test": st.session_state.all_the_process += f""" # Evaluation - AUC Score from sklearn.metrics import roc_auc_score print("AUC Score on Train Set: ", roc_auc_score(y_train, y_pred_train)) print("AUC Score on Validation Set: ", roc_auc_score(y_val, y_pred_val)) print("AUC Score on Test Set: ", roc_auc_score(y_test, y_pred_test)) \n """ from sklearn.metrics import roc_auc_score train_auc = roc_auc_score(y_train, y_pred_train) val_auc = roc_auc_score(y_val, y_pred_val) test_auc = roc_auc_score(y_test, y_pred_test) metrics_df[metric] = [train_auc, val_auc, test_auc] st.session_state['metrics_df'] = metrics_df else: st.session_state.all_the_process += f""" # Evaluation - AUC Score from sklearn.metrics import roc_auc_score print("AUC Score on Train Set: ", roc_auc_score(y_train, y_pred_train)) print("AUC Score on Test Set: ", roc_auc_score(y_test, y_pred_test)) \n """ from sklearn.metrics import roc_auc_score train_auc = roc_auc_score(y_train, y_pred_train) test_auc = roc_auc_score(y_test, y_pred_test) metrics_df[metric] = [train_auc, test_auc] st.session_state['metrics_df'] = metrics_df elif metric == "Mean Absolute Error (MAE)": if "Mean Absolute Error (MAE)" not in st.session_state.models_with_eval[str(model)]: st.session_state.models_with_eval[str(model)].append("Mean Absolute Error (MAE)") if st.session_state["split_sets"] == "Train, Validation, and Test": st.session_state.all_the_process += f""" # Evaluation - MAE from sklearn.metrics import mean_absolute_error print("MAE on Train Set: ", mean_absolute_error(y_train, y_pred_train)) print("MAE on Validation Set: ", mean_absolute_error(y_val, y_pred_val)) print("MAE on Test Set: ", mean_absolute_error(y_test, y_pred_test)) \n """ from sklearn.metrics import mean_absolute_error train_mae = mean_absolute_error(y_train, y_pred_train) val_mae = mean_absolute_error(y_val, y_pred_val) test_mae = mean_absolute_error(y_test, y_pred_test) metrics_df[metric] = [train_mae, val_mae, test_mae] st.session_state['metrics_df'] = metrics_df else: st.session_state.all_the_process += f""" # Evaluation - MAE from sklearn.metrics import mean_absolute_error print("MAE on Train Set: ", mean_absolute_error(y_train, y_pred_train)) print("MAE on Test Set: ", mean_absolute_error(y_test, y_pred_test)) \n """ from sklearn.metrics import mean_absolute_error train_mae = mean_absolute_error(y_train, y_pred_train) test_mae = mean_absolute_error(y_test, y_pred_test) metrics_df[metric] = [train_mae, test_mae] st.session_state['metrics_df'] = metrics_df elif metric == "Mean Squared Error (MSE)": if "Mean Squared Error (MSE)" not in st.session_state.models_with_eval[str(model)]: st.session_state.models_with_eval[str(model)].append("Mean Squared Error (MSE)") if st.session_state["split_sets"] == "Train, Validation, and Test": st.session_state.all_the_process += f""" # Evaluation - MSE from sklearn.metrics import mean_squared_error print("MSE on Train Set: ", mean_squared_error(y_train, y_pred_train)) print("MSE on Validation Set: ", mean_squared_error(y_val, y_pred_val)) print("MSE on Test Set: ", mean_squared_error(y_test, y_pred_test)) \n """ from sklearn.metrics import mean_squared_error train_mse = mean_squared_error(y_train, y_pred_train) val_mse = mean_squared_error(y_val, y_pred_val) test_mse = mean_squared_error(y_test, y_pred_test) metrics_df[metric] = [train_mse, val_mse, test_mse] st.session_state['metrics_df'] = metrics_df else: st.session_state.all_the_process += f""" # Evaluation - MSE from sklearn.metrics import mean_squared_error print("MSE on Train Set: ", mean_squared_error(y_train, y_pred_train)) print("MSE on Test Set: ", mean_squared_error(y_test, y_pred_test)) \n """ from sklearn.metrics import mean_squared_error train_mse = mean_squared_error(y_train, y_pred_train) test_mse = mean_squared_error(y_test, y_pred_test) metrics_df[metric] = [train_mse, test_mse] st.session_state['metrics_df'] = metrics_df elif metric == "Root Mean Squared Error (RMSE)": if "Root Mean Squared Error (RMSE)" not in st.session_state.models_with_eval[str(model)]: st.session_state.models_with_eval[str(model)].append("Root Mean Squared Error (RMSE)") if st.session_state["split_sets"] == "Train, Validation, and Test": st.session_state.all_the_process += f""" # Evaluation - RMSE from sklearn.metrics import mean_squared_error print("RMSE on Train Set: ", np.sqrt(mean_squared_error(y_train, y_pred_train))) print("RMSE on Validation Set: ", np.sqrt(mean_squared_error(y_val, y_pred_val))) print("RMSE on Test Set: ", np.sqrt(mean_squared_error(y_test, y_pred_test))) \n """ from sklearn.metrics import mean_squared_error train_rmse = np.sqrt(mean_squared_error(y_train, y_pred_train)) val_rmse = np.sqrt(mean_squared_error(y_val, y_pred_val)) test_rmse = np.sqrt(mean_squared_error(y_test, y_pred_test)) metrics_df[metric] = [train_rmse, val_rmse, test_rmse] st.session_state['metrics_df'] = metrics_df else: st.session_state.all_the_process += f""" # Evaluation - RMSE from sklearn.metrics import mean_squared_error print("RMSE on Train Set: ", np.sqrt(mean_squared_error(y_train, y_pred_train))) print("RMSE on Test Set: ", np.sqrt(mean_squared_error(y_test, y_pred_test))) \n """ from sklearn.metrics import mean_squared_error train_rmse = np.sqrt(mean_squared_error(y_train, y_pred_train)) test_rmse = np.sqrt(mean_squared_error(y_test, y_pred_test)) metrics_df[metric] = [train_rmse, test_rmse] st.session_state['metrics_df'] = metrics_df elif metric == "R2 Score": if "R2 Score" not in st.session_state.models_with_eval[str(model)]: st.session_state.models_with_eval[str(model)].append("R2 Score") if st.session_state["split_sets"] == "Train, Validation, and Test": st.session_state.all_the_process += f""" # Evaluation - R2 Score from sklearn.metrics import r2_score print("R2 Score on Train Set: ", r2_score(y_train, y_pred_train)) print("R2 Score on Validation Set: ", r2_score(y_val, y_pred_val)) print("R2 Score on Test Set: ", r2_score(y_test, y_pred_test)) \n """ from sklearn.metrics import r2_score train_r2 = r2_score(y_train, y_pred_train) val_r2 = r2_score(y_val, y_pred_val) test_r2 = r2_score(y_test, y_pred_test) metrics_df[metric] = [train_r2, val_r2, test_r2] st.session_state['metrics_df'] = metrics_df else: st.session_state.all_the_process += f""" # Evaluation - R2 Score from sklearn.metrics import r2_score print("R2 Score on Train Set: ", r2_score(y_train, y_pred_train)) print("R2 Score on Test Set: ", r2_score(y_test, y_pred_test)) \n """ from sklearn.metrics import r2_score train_r2 = r2_score(y_train, y_pred_train) test_r2 = r2_score(y_test, y_pred_test) metrics_df[metric] = [train_r2, test_r2] st.session_state['metrics_df'] = metrics_df # Show Evaluation Metric if show_eval: new_line() col1, col2, col3 = st.columns([0.5, 1, 0.5]) st.markdown("### Evaluation Metric") if st.session_state["split_sets"] == "Train, Validation, and Test": st.session_state['metrics_df'].index = ['Train', 'Validation', 'Test'] st.write(st.session_state['metrics_df']) elif st.session_state["split_sets"] == "Train and Test": st.session_state['metrics_df'].index = ['Train', 'Test'] st.write(st.session_state['metrics_df']) # Show Evaluation Metric Plot new_line() st.markdown("### Evaluation Metric Plot") st.line_chart(st.session_state['metrics_df']) # Show ROC Curve as plot if "AUC Score" in evaluation_metric: from sklearn.metrics import plot_roc_curve st.markdown("### ROC Curve") new_line() if st.session_state["split_sets"] == "Train, Validation, and Test": # Show the ROC curve plot without any columns col1, col2, col3 = st.columns([0.2, 1, 0.2]) fig, ax = plt.subplots() plot_roc_curve(model, X_train, y_train, ax=ax) plot_roc_curve(model, X_val, y_val, ax=ax) plot_roc_curve(model, X_test, y_test, ax=ax) ax.legend(['Train', 'Validation', 'Test']) col2.pyplot(fig, legend=True) elif st.session_state["split_sets"] == "Train and Test": # Show the ROC curve plot without any columns col1, col2, col3 = st.columns([0.2, 1, 0.2]) fig, ax = plt.subplots() plot_roc_curve(model, X_train, y_train, ax=ax) plot_roc_curve(model, X_test, y_test, ax=ax) ax.legend(['Train', 'Test']) col2.pyplot(fig, legend=True) # Show Confusion Matrix as plot if st.session_state['problem_type'] == "Classification": # from sklearn.metrics import plot_confusion_matrix from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix st.markdown("### Confusion Matrix") new_line() cm = confusion_matrix(y_test, y_pred_test) col1, col2, col3 = st.columns([0.2,1,0.2]) fig, ax = plt.subplots() ConfusionMatrixDisplay.from_predictions(y_test, y_pred_test, ax=ax) col2.pyplot(fig) # Show the confusion matrix plot without any columns # col1, col2, col3 = st.columns([0.2, 1, 0.2]) # fig, ax = plt.subplots() # plot_confusion_matrix(model, X_test, y_test, ax=ax) # col2.pyplot(fig) st.divider() col1, col2, col3, col4= st.columns(4, gap='small') if col1.button("🎬 Show df", use_container_width=True): new_line() st.subheader(" 🎬 Show The Dataframe") st.write("The dataframe is the dataframe that is used on this application to build the Machine Learning model. You can see the dataframe below πŸ‘‡") new_line() st.dataframe(df, use_container_width=True) st.session_state.df.to_csv("df.csv", index=False) df_file = open("df.csv", "rb") df_bytes = df_file.read() if col2.download_button("πŸ“Œ Download df", df_bytes, "df.csv", key='save_df', use_container_width=True): st.success("Downloaded Successfully!") if col3.button("πŸ’» Code", use_container_width=True): new_line() st.subheader("πŸ’» The Code") st.write("The code below is the code that is used to build the model. It is the code that is generated by the app. You can copy the code and use it in your own project πŸ˜‰") new_line() st.code(st.session_state.all_the_process, language='python') if col4.button("β›” Reset", use_container_width=True): new_line() st.subheader("β›” Reset") st.write("Click the button below to reset the app and start over again") new_line() st.session_state.reset_1 = True if st.session_state.reset_1: col1, col2, col3 = st.columns(3) if col2.button("β›” Reset", use_container_width=True, key='reset'): st.session_state.df = None st.session_state.clear() st.experimental_rerun()