Spaces:

Mattral
/

Click-Analyst

Sleeping

App Files Files Community

Click-Analyst / app.py

Mattral

Update app.py

1e9873a verified over 1 year ago

raw

history blame contribute delete

120 kB

	# Importing Libraries
	import streamlit as st
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	import plotly.express as px
	import time
	from PIL import Image
	from wordcloud import WordCloud

	# Config
	page_icon = Image.open("./assets/logo.png")
	st.set_page_config(layout="centered", page_title="Click Analyst", page_icon=page_icon)


	# Initial State
	def initial_state():
	if 'df' not in st.session_state:
	st.session_state['df'] = None

	if 'X_train' not in st.session_state:
	st.session_state['X_train'] = None

	if 'X_test' not in st.session_state:
	st.session_state['X_test'] = None

	if 'y_train' not in st.session_state:
	st.session_state['y_train'] = None

	if 'y_test' not in st.session_state:
	st.session_state['y_test'] = None

	if 'X_val' not in st.session_state:
	st.session_state['X_val'] = None

	if 'y_val' not in st.session_state:
	st.session_state['y_val'] = None

	if "model" not in st.session_state:
	st.session_state['model'] = None

	if 'trained_model' not in st.session_state:
	st.session_state['trained_model'] = False

	if "trained_model_bool" not in st.session_state:
	st.session_state['trained_model_bool'] = False

	if "problem_type" not in st.session_state:
	st.session_state['problem_type'] = None

	if "metrics_df" not in st.session_state:
	st.session_state['metrics_df'] = pd.DataFrame()

	if "is_train" not in st.session_state:
	st.session_state['is_train'] = False

	if "is_test" not in st.session_state:
	st.session_state['is_test'] = False

	if "is_val" not in st.session_state:
	st.session_state['is_val'] = False

	if "show_eval" not in st.session_state:
	st.session_state['show_eval'] = False

	if "all_the_process" not in st.session_state:
	st.session_state['all_the_process'] = """"""

	if "all_the_process_predictions" not in st.session_state:
	st.session_state['all_the_process_predictions'] = False

	if 'y_pred_train' not in st.session_state:
	st.session_state['y_pred_train'] = None

	if 'y_pred_test' not in st.session_state:
	st.session_state['y_pred_test'] = None

	if 'y_pred_val' not in st.session_state:
	st.session_state['y_pred_val'] = None

	if 'uploading_way' not in st.session_state:
	st.session_state['uploading_way'] = None

	if "lst_models" not in st.session_state:
	st.session_state["lst_models"] = []

	if "lst_models_predctions" not in st.session_state:
	st.session_state["lst_models_predctions"] = []

	if "models_with_eval" not in st.session_state:
	st.session_state["models_with_eval"] = dict()

	if "reset_1" not in st.session_state:
	st.session_state["reset_1"] = False

	initial_state()

	# New Line
	def new_line(n=1):
	for i in range(n):
	st.write("\n")

	# Load Data
	st.cache_data()
	def load_data(upd_file):
	# Read CSV or Excel file
	if upd_file.name.endswith('.csv'):
	return pd.read_csv(upd_file)
	elif upd_file.name.endswith('.xlsx') or upd_file.name.endswith('.xls'):
	return pd.read_excel(upd_file)
	else:
	raise ValueError("Unsupported file format. Only CSV and Excel files are supported.")


	# Progress Bar
	def progress_bar():
	my_bar = st.progress(0)
	for percent_complete in range(100):
	time.sleep(0.0002)
	my_bar.progress(percent_complete + 1)


	# Logo
	col1, col2, col3 = st.columns([0.25,1,0.25])
	col2.image("./assets/logo.png", use_column_width=True)
	new_line(2)

	# Description
	st.markdown("""Welcome to Click Analytics! 🚀
	Dive right into the future of data with our user-friendly platform designed for everyone—no coding or machine learning experience required!
	With just a few clicks, you can start preparing your data, training cutting-edge models, and uncovering valuable insights.
	Whether you're a data enthusiast or a seasoned analyst, Click Analytics empowers you to effortlessly create, analyze, and explore.
	What are you waiting for? Start building your very own analytics and models today and see what decisions you can empower with your data!!""", unsafe_allow_html=True)
	st.divider()


	# Dataframe selection
	st.markdown("<h2 align='center'> <b> Getting Started", unsafe_allow_html=True)
	new_line(1)
	st.write("The first step is to upload your data. You can upload your data in three ways: Upload File, Select from Ours, and Write URL. In all ways the data should be a csv file and should not exceed 200 MB.")
	new_line(1)



	# Uploading Way
	uploading_way = st.session_state.uploading_way
	col1, col2, col3 = st.columns(3,gap='large')

	# Upload
	def upload_click(): st.session_state.uploading_way = "upload"
	col1.markdown("<h5 align='center'> Upload File", unsafe_allow_html=True)
	col1.button("Upload File", key="upload_file", use_container_width=True, on_click=upload_click)

	# URL
	def url_click(): st.session_state.uploading_way = "url"
	col3.markdown("<h5 align='center'> Write URL", unsafe_allow_html=True)
	col3.button("Write URL", key="write_url", use_container_width=True, on_click=url_click)



	# No Data
	if st.session_state.df is None:

	# Upload
	if uploading_way == "upload":
	uploaded_file = st.file_uploader("Upload the Dataset", type=["csv", "xlsx", "xls"])
	if uploaded_file:
	try:
	df = load_data(uploaded_file)
	st.session_state.df = df
	except Exception as e:
	st.error(f"Error loading the file: {e}")

	# URL
	elif uploading_way == "url":
	url = st.text_input("Enter URL")
	if url:
	df = load_data(url)
	st.session_state.df = df


	# Sidebar
	with st.sidebar:
	st.image("./assets/logo.png", use_column_width=True)


	# Dataframe
	if st.session_state.df is not None:

	# Re-initialize the variables from the state
	df = st.session_state.df
	X_train = st.session_state.X_train
	X_test = st.session_state.X_test
	y_train = st.session_state.y_train
	y_test = st.session_state.y_test
	X_val = st.session_state.X_val
	y_val = st.session_state.y_val
	trained_model = st.session_state.trained_model
	is_train = st.session_state.is_train
	is_test = st.session_state.is_test
	is_val = st.session_state.is_val
	model = st.session_state.model
	show_eval = st.session_state.show_eval
	y_pred_train = st.session_state.y_pred_train
	y_pred_test = st.session_state.y_pred_test
	y_pred_val = st.session_state.y_pred_val
	metrics_df = st.session_state.metrics_df

	st.divider()
	new_line()


	# EDA
	st.markdown("### 🕵️‍♂️ Exploratory Data Analysis", unsafe_allow_html=True)
	new_line()
	with st.expander("Show EDA"):
	new_line()

	# Head
	head = st.checkbox("Show First 5 Rows", value=False)
	new_line()
	if head:
	st.dataframe(df.head(), use_container_width=True)

	# Tail
	tail = st.checkbox("Show Last 5 Rows", value=False)
	new_line()
	if tail:
	st.dataframe(df.tail(), use_container_width=True)

	# Shape
	shape = st.checkbox("Show Shape", value=False)
	new_line()
	if shape:
	st.write(f"This DataFrame has {df.shape[0]} rows and {df.shape[1]} columns.")
	new_line()

	# Columns
	columns = st.checkbox("Show Columns", value=False)
	new_line()
	if columns:
	st.write(pd.DataFrame(df.columns, columns=['Columns']).T)
	new_line()

	if st.checkbox("Check Data Types", value=False):
	st.write(df.dtypes)
	new_line()

	new_line()
	if st.checkbox("Show Skewness and Kurtosis", value=False):
	skew_kurt = pd.DataFrame(data={
	'Skewness': df.skew(),
	'Kurtosis': df.kurtosis()
	})
	st.write(skew_kurt)
	new_line()

	new_line()
	# Describe Numerical
	describe = st.checkbox("Show Description (Numerical Features)", value=False)
	new_line()
	if describe:
	st.dataframe(df.describe(), use_container_width=True)
	new_line()

	if st.checkbox("Unique Value Count", value=False):
	unique_counts = pd.DataFrame(df.nunique()).rename(columns={0: 'Unique Count'})
	st.write(unique_counts)
	new_line()

	new_line()
	# Describe Categorical
	describe_cat = st.checkbox("Show Description (Categorical Features)", value=False)
	new_line()
	if describe_cat:
	if df.select_dtypes(include=np.object).columns.tolist():
	st.dataframe(df.describe(include=['object']), use_container_width=True)
	new_line()
	else:
	st.info("There is no Categorical Features.")
	new_line()

	# Correlation Matrix using heatmap seabron
	corr = st.checkbox("Show Correlation", value=False)
	new_line()
	if corr:

	if df.corr().columns.tolist():
	fig, ax = plt.subplots()
	sns.heatmap(df.corr(), cmap='Blues', annot=True, ax=ax)
	st.pyplot(fig)
	new_line()
	else:
	st.info("There is no Numerical Features.")


	# Missing Values
	missing = st.checkbox("Show Missing Values", value=False)
	new_line()
	if missing:

	col1, col2 = st.columns([0.4,1])
	with col1:
	st.markdown("<h6 align='center'> Number of Null Values", unsafe_allow_html=True)
	st.dataframe(df.isnull().sum().sort_values(ascending=False),height=350, use_container_width=True)

	with col2:
	st.markdown("<h6 align='center'> Plot for the Null Values ", unsafe_allow_html=True)
	null_values = df.isnull().sum()
	null_values = null_values[null_values > 0]
	null_values = null_values.sort_values(ascending=False)
	null_values = null_values.to_frame()
	null_values.columns = ['Count']
	null_values.index.names = ['Feature']
	null_values['Feature'] = null_values.index
	fig = px.bar(null_values, x='Feature', y='Count', color='Count', height=350)
	st.plotly_chart(fig, use_container_width=True)

	new_line()


	# Delete Columns
	delete = st.checkbox("Delete Columns", value=False)
	new_line()
	if delete:
	col_to_delete = st.multiselect("Select Columns to Delete", df.columns)
	new_line()

	col1, col2, col3 = st.columns([1,0.7,1])
	if col2.button("Delete", use_container_width=True):
	st.session_state.all_the_process += f"""
	# Delete Columns
	df.drop(columns={col_to_delete}, inplace=True)
	\n """
	progress_bar()
	df.drop(columns=col_to_delete, inplace=True)
	st.session_state.df = df
	st.success(f"The Columns `{col_to_delete}` are Deleted Successfully!")


	# Show DataFrame Button
	col1, col2, col3 = st.columns([0.15,1,0.15])
	col2.divider()
	col1, col2, col3 = st.columns([1, 0.7, 1])
	if col2.button("Show DataFrame", use_container_width=True):
	st.dataframe(df, use_container_width=True)

	#start point

	# Histograms for Numerical Features
	hist = st.checkbox("Show Histograms", value=False)
	new_line()
	if hist:
	numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
	col_for_hist = st.selectbox("Select Column for Histogram", options=numeric_cols)
	num_bins = st.slider("Select Number of Bins", min_value=10, max_value=100, value=30)
	fig, ax = plt.subplots()
	df[col_for_hist].hist(bins=num_bins, ax=ax, color='skyblue')
	ax.set_title(f'Histogram of {col_for_hist}')
	st.pyplot(fig)
	new_line()

	# Box Plots for Numerical Features
	boxplot = st.checkbox("Show Box Plots", value=False)
	new_line()
	if boxplot:
	numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
	col_for_box = st.selectbox("Select Column for Box Plot", options=numeric_cols)
	fig, ax = plt.subplots()
	df.boxplot(column=[col_for_box], ax=ax)
	ax.set_title(f'Box Plot of {col_for_box}')
	st.pyplot(fig)
	new_line()

	st.set_option('deprecation.showPyplotGlobalUse', False)

	# Scatter Plots for Numerical Features
	scatter = st.checkbox("Show Scatter Plots", value=False)
	new_line()
	if scatter:
	numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
	x_col = st.selectbox("Select X-axis Column", options=numeric_cols, index=0)
	y_col = st.selectbox("Select Y-axis Column", options=numeric_cols, index=1 if len(numeric_cols) > 1 else 0)
	fig, ax = plt.subplots()
	df.plot(kind='scatter', x=x_col, y=y_col, ax=ax, color='red')
	ax.set_title(f'Scatter Plot between {x_col} and {y_col}')
	st.pyplot(fig)
	new_line()

	# Pair Plots for Numerical Features
	pairplot = st.checkbox("Show Pair Plots", value=False)
	new_line()
	if pairplot:
	sns.pairplot(df.select_dtypes(include=np.number))
	st.pyplot()

	# Count Plots for Categorical Data
	countplot = st.checkbox("Show Count Plots", value=False)
	new_line()
	if countplot:
	categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
	col_for_count = st.selectbox("Select Column for Count Plot", options=categorical_cols)
	fig, ax = plt.subplots()
	sns.countplot(x=df[col_for_count], data=df, ax=ax)
	ax.set_title(f'Count Plot of {col_for_count}')
	st.pyplot(fig)
	new_line()

	# Pie Charts for Categorical Data
	pie_chart = st.checkbox("Show Pie Charts", value=False)
	new_line()
	if pie_chart:
	categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
	col_for_pie = st.selectbox("Select Column for Pie Chart", options=categorical_cols)
	pie_data = df[col_for_pie].value_counts()
	fig, ax = plt.subplots()
	ax.pie(pie_data, labels=pie_data.index, autopct='%1.1f%%', startangle=90)
	ax.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
	ax.set_title(f'Pie Chart of {col_for_pie}')
	st.pyplot(fig)
	new_line()

	new_line()
	if st.checkbox("Identify Outliers", value=False):
	numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
	col_for_outliers = st.selectbox("Select Column to Check Outliers", options=numeric_cols)
	fig, ax = plt.subplots()
	sns.boxplot(x=df[col_for_outliers], ax=ax)
	ax.set_title(f'Outliers in {col_for_outliers}')
	st.pyplot(fig)
	new_line()

	new_line()
	if st.checkbox("Show Cross-tabulations", value=False):
	categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
	x_col = st.selectbox("Select X-axis Column for Cross-tab", options=categorical_cols, index=0)
	y_col = st.selectbox("Select Y-axis Column for Cross-tab", options=categorical_cols, index=1 if len(categorical_cols) > 1 else 0)
	cross_tab = pd.crosstab(df[x_col], df[y_col])
	st.write(cross_tab)
	new_line()

	new_line()
	if st.checkbox("Segmented Analysis", value=False):
	segments = st.selectbox("Select Segment", options=df.columns)
	segment_values = df[segments].dropna().unique()
	selected_segment = st.selectbox("Choose Segment Value", options=segment_values)
	segmented_data = df[df[segments] == selected_segment]
	st.write(segmented_data)
	new_line()

	new_line()
	if st.checkbox("Temporal Analysis", value=False):
	date_col_options = df.select_dtypes(include=[np.datetime64]).columns.tolist()
	value_col_options = df.select_dtypes(include=np.number).columns.tolist()

	if not date_col_options:
	st.error("No datetime columns found in the DataFrame.")
	elif not value_col_options:
	st.error("No numeric columns found in the DataFrame.")
	else:
	date_col = st.selectbox("Select Date Column", options=date_col_options)
	value_col = st.selectbox("Select Value Column", options=value_col_options)

	fig, ax = plt.subplots()
	df.set_index(date_col)[value_col].plot(ax=ax)
	ax.set_title(f'Trend Over Time - {value_col}')
	st.pyplot(fig)

	new_line()
	if st.checkbox("Show Word Cloud", value=False):
	# Get the list of object-type columns for user to choose from
	text_col_options = df.select_dtypes(include=[np.object, 'string']).columns.tolist()

	if text_col_options:
	# Let the user select a text column
	text_col = st.selectbox("Select Text Column for Word Cloud", options=text_col_options)

	# Collect text data, dropping NA values and joining them into a single string
	text_data = ' '.join(df[text_col].dropna()).strip()

	if text_data: # Check if there is any text data to use
	try:
	wordcloud = WordCloud(width=800, height=400).generate(text_data)
	fig, ax = plt.subplots()
	ax.imshow(wordcloud, interpolation='bilinear')
	ax.axis('off')
	st.pyplot(fig)
	except ValueError as e:
	st.error("Failed to generate word cloud: " + str(e))
	else:
	st.error("No words available to create a word cloud. Please check the selected text data.")
	else:
	st.error("No suitable text columns found for creating a word cloud.")


	new_line()
	# Interactive Data Tables
	interactive_table = st.checkbox("Show Interactive Data Table", value=False)
	new_line()
	if interactive_table:
	st.dataframe(df)
	new_line()



	# Missing Values
	new_line()
	st.markdown("### ⚠️ Missing Values", unsafe_allow_html=True)
	new_line()
	with st.expander("Show Missing Values"):

	# Further Analysis
	new_line()
	missing = st.checkbox("Further Analysis", value=False, key='missing')
	new_line()
	if missing:

	col1, col2 = st.columns(2, gap='medium')
	with col1:
	# Number of Null Values
	st.markdown("<h6 align='center'> Number of Null Values", unsafe_allow_html=True)
	st.dataframe(df.isnull().sum().sort_values(ascending=False), height=300, use_container_width=True)

	with col2:
	# Percentage of Null Values
	st.markdown("<h6 align='center'> Percentage of Null Values", unsafe_allow_html=True)
	null_percentage = pd.DataFrame(round(df.isnull().sum()/df.shape[0]*100, 2))
	null_percentage.columns = ['Percentage']
	null_percentage['Percentage'] = null_percentage['Percentage'].map('{:.2f} %'.format)
	null_percentage = null_percentage.sort_values(by='Percentage', ascending=False)
	st.dataframe(null_percentage, height=300, use_container_width=True)

	# Heatmap
	col1, col2, col3 = st.columns([0.1,1,0.1])
	with col2:
	new_line()
	st.markdown("<h6 align='center'> Plot for the Null Values ", unsafe_allow_html=True)
	null_values = df.isnull().sum()
	null_values = null_values[null_values > 0]
	null_values = null_values.sort_values(ascending=False)
	null_values = null_values.to_frame()
	null_values.columns = ['Count']
	null_values.index.names = ['Feature']
	null_values['Feature'] = null_values.index
	fig = px.bar(null_values, x='Feature', y='Count', color='Count', height=350)
	st.plotly_chart(fig, use_container_width=True)


	# INPUT
	col1, col2 = st.columns(2)
	with col1:
	missing_df_cols = df.columns[df.isnull().any()].tolist()
	if missing_df_cols:
	add_opt = ["All Numerical Features (ClickML Feature)", "All Categorical Feature (ClickML Feature)"]
	else:
	add_opt = []
	fill_feat = st.multiselect("Select Features", missing_df_cols + add_opt , help="Select Features to fill missing values")

	with col2:
	strategy = st.selectbox("Select Missing Values Strategy", ["Select", "Drop Rows", "Drop Columns", "Fill with Mean", "Fill with Median", "Fill with Mode (Most Frequent)", "Fill with ffill, bfill"], help="Select Missing Values Strategy")


	if fill_feat and strategy != "Select":

	new_line()
	col1, col2, col3 = st.columns([1,0.5,1])
	if col2.button("Apply", use_container_width=True, key="missing_apply", help="Apply Missing Values Strategy"):

	progress_bar()

	# All Numerical Features
	if "All Numerical Features (ClickML Feature)" in fill_feat:
	fill_feat.remove("All Numerical Features (ClickML Feature)")
	fill_feat += df.select_dtypes(include=np.number).columns.tolist()

	# All Categorical Features
	if "All Categorical Feature (ClickML Feature)" in fill_feat:
	fill_feat.remove("All Categorical Feature (ClickML Feature)")
	fill_feat += df.select_dtypes(include=np.object).columns.tolist()


	# Drop Rows
	if strategy == "Drop Rows":
	st.session_state.all_the_process += f"""
	# Drop Rows
	df[{fill_feat}] = df[{fill_feat}].dropna(axis=0)
	\n """
	df[fill_feat] = df[fill_feat].dropna(axis=0)
	st.session_state['df'] = df
	st.success(f"Missing values have been dropped from the DataFrame for the features `{fill_feat}`.")


	# Drop Columns
	elif strategy == "Drop Columns":
	st.session_state.all_the_process += f"""
	# Drop Columns
	df[{fill_feat}] = df[{fill_feat}].dropna(axis=1)
	\n """
	df[fill_feat] = df[fill_feat].dropna(axis=1)
	st.session_state['df'] = df
	st.success(f"The Columns `{fill_feat}` have been dropped from the DataFrame.")


	# Fill with Mean
	elif strategy == "Fill with Mean":
	st.session_state.all_the_process += f"""
	# Fill with Mean
	from sklearn.impute import SimpleImputer
	num_imputer = SimpleImputer(strategy='mean')
	df[{fill_feat}] = num_imputer.fit_transform(df[{fill_feat}])
	\n """
	from sklearn.impute import SimpleImputer
	num_imputer = SimpleImputer(strategy='mean')
	df[fill_feat] = num_imputer.fit_transform(df[fill_feat])

	null_cat = df[missing_df_cols].select_dtypes(include=np.object).columns.tolist()
	if null_cat:
	st.session_state.all_the_process += f"""
	# Fill with Mode
	from sklearn.impute import SimpleImputer
	cat_imputer = SimpleImputer(strategy='most_frequent')
	df[{null_cat}] = cat_imputer.fit_transform(df[{null_cat}])
	\n """
	cat_imputer = SimpleImputer(strategy='most_frequent')
	df[null_cat] = cat_imputer.fit_transform(df[null_cat])

	st.session_state['df'] = df
	if df.select_dtypes(include=np.object).columns.tolist():
	st.success(f"The Columns `{fill_feat}` has been filled with the mean. And the categorical columns `{null_cat}` has been filled with the mode.")
	else:
	st.success(f"The Columns `{fill_feat}` has been filled with the mean.")


	# Fill with Median
	elif strategy == "Fill with Median":
	st.session_state.all_the_process += f"""
	# Fill with Median
	from sklearn.impute import SimpleImputer
	num_imputer = SimpleImputer(strategy='median')
	df[{fill_feat}] = pd.DataFrame(num_imputer.fit_transform(df[{fill_feat}]), columns=df[{fill_feat}].columns)
	\n """
	from sklearn.impute import SimpleImputer
	num_imputer = SimpleImputer(strategy='median')
	df[fill_feat] = pd.DataFrame(num_imputer.fit_transform(df[fill_feat]), columns=df[fill_feat].columns)

	null_cat = df[missing_df_cols].select_dtypes(include=np.object).columns.tolist()
	if null_cat:
	st.session_state.all_the_process += f"""
	# Fill with Mode
	from sklearn.impute import SimpleImputer
	cat_imputer = SimpleImputer(strategy='most_frequent')
	df[{null_cat}] = cat_imputer.fit_transform(df[{null_cat}])
	\n """
	cat_imputer = SimpleImputer(strategy='most_frequent')
	df[null_cat] = cat_imputer.fit_transform(df[null_cat])

	st.session_state['df'] = df
	if df.select_dtypes(include=np.object).columns.tolist():
	st.success(f"The Columns `{fill_feat}` has been filled with the Median. And the categorical columns `{null_cat}` has been filled with the mode.")
	else:
	st.success(f"The Columns `{fill_feat}` has been filled with the Median.")


	# Fill with Mode
	elif strategy == "Fill with Mode (Most Frequent)":
	st.session_state.all_the_process += f"""
	# Fill with Mode
	from sklearn.impute import SimpleImputer
	imputer = SimpleImputer(strategy='most_frequent')
	df[{fill_feat}] = imputer.fit_transform(df[{fill_feat}])
	\n """
	from sklearn.impute import SimpleImputer
	imputer = SimpleImputer(strategy='most_frequent')
	df[fill_feat] = imputer.fit_transform(df[fill_feat])

	st.session_state['df'] = df
	st.success(f"The Columns `{fill_feat}` has been filled with the Mode.")


	# Fill with ffill, bfill
	elif strategy == "Fill with ffill, bfill":
	st.session_state.all_the_process += f"""
	# Fill with ffill, bfill
	df[{fill_feat}] = df[{fill_feat}].fillna(method='ffill').fillna(method='bfill')
	\n """
	df = df.fillna(method='ffill').fillna(method='bfill')
	st.session_state['df'] = df
	st.success("The DataFrame has been filled with ffill, bfill.")

	# Show DataFrame Button
	col1, col2, col3 = st.columns([0.15,1,0.15])
	col2.divider()
	col1, col2, col3 = st.columns([0.9, 0.6, 1])
	with col2:
	show_df = st.button("Show DataFrame", key="missing_show_df")
	if show_df:
	st.dataframe(df, use_container_width=True)


	# Encoding
	new_line()
	st.markdown("### 🔠 Handling Categorical Data", unsafe_allow_html=True)
	new_line()
	with st.expander("Show Encoding"):
	new_line()

	# Explain
	exp_enc = st.checkbox("Explain Encoding", value=False, key='exp_enc')
	if exp_enc:
	col1, col2 = st.columns([0.8,1])
	with col1:
	st.markdown("<h6 align='center'>Ordinal Encoding</h6>", unsafe_allow_html=True)
	cola, colb = st.columns(2)
	with cola:
	st.write("Before Encoding")
	st.dataframe(pd.DataFrame(np.array(['a','b','c','b','a']) ),width=120, height=200)
	with colb:
	st.write("After Encoding")
	st.dataframe(pd.DataFrame(np.array([0,1,2,1,0])),width=120, height=200)

	with col2:
	st.markdown("<h6 align='center'>One Hot Encoding</h6>", unsafe_allow_html=True)
	cola, colb = st.columns([0.7,1])
	with cola:
	st.write("Before Encoding")
	st.dataframe(pd.DataFrame(np.array(['a','b','c', 'b','a']) ),width=150, height=200)
	with colb:
	st.write("After Encoding")
	st.dataframe(pd.DataFrame(np.array([[1,0,0],[0,1,0],[0,0,1],[0,1,0],[1,0,0]])),width=200, height=200)

	col1, col2, col3 = st.columns([0.5,1,0.5])
	with col2:
	new_line()
	st.markdown("<h6 align='center'>Count Frequency Encoding</h6>", unsafe_allow_html=True)
	cola, colb = st.columns([0.8,1])
	with cola:
	st.write("Before Encoding")
	st.dataframe(pd.DataFrame(np.array(['a','b','c', 'b','a']) ),width=150, height=200)
	with colb:
	st.write("After Encoding")
	st.dataframe(pd.DataFrame(np.array([0.4,0.4,0.2,0.4,0.4])),width=200, height=200)

	new_line()

	# INFO
	show_cat = st.checkbox("Show Categorical Features", value=False, key='show_cat')
	# new_line()
	if show_cat:
	col1, col2 = st.columns(2)
	col1.dataframe(df.select_dtypes(include=np.object), height=250, use_container_width=True )
	if len(df.select_dtypes(include=np.object).columns.tolist()) > 1:
	tmp = df.select_dtypes(include=np.object)
	tmp = tmp.apply(lambda x: x.unique())
	tmp = tmp.to_frame()
	tmp.columns = ['Unique Values']
	col2.dataframe(tmp, height=250, use_container_width=True )

	# Further Analysis
	# new_line()
	further_analysis = st.checkbox("Further Analysis", value=False, key='further_analysis')
	if further_analysis:

	col1, col2 = st.columns([0.5,1])
	with col1:
	# Each categorical feature has how many unique values as dataframe
	new_line()
	st.markdown("<h6 align='left'> Number of Unique Values", unsafe_allow_html=True)
	unique_values = pd.DataFrame(df.select_dtypes(include=np.object).nunique())
	unique_values.columns = ['# Unique Values']
	unique_values = unique_values.sort_values(by='# Unique Values', ascending=False)
	st.dataframe(unique_values, width=200, height=300)

	with col2:
	# Plot for the count of unique values for the categorical features
	new_line()
	st.markdown("<h6 align='center'> Plot for the Count of Unique Values ", unsafe_allow_html=True)
	unique_values = pd.DataFrame(df.select_dtypes(include=np.object).nunique())
	unique_values.columns = ['# Unique Values']
	unique_values = unique_values.sort_values(by='# Unique Values', ascending=False)
	unique_values['Feature'] = unique_values.index
	fig = px.bar(unique_values, x='Feature', y='# Unique Values', color='# Unique Values', height=350)
	st.plotly_chart(fig, use_container_width=True)




	# INPUT
	col1, col2 = st.columns(2)
	with col1:
	enc_feat = st.multiselect("Select Features", df.select_dtypes(include=np.object).columns.tolist(), key='encoding_feat', help="Select the categorical features to encode.")

	with col2:
	encoding = st.selectbox("Select Encoding", ["Select", "Ordinal Encoding", "One Hot Encoding", "Count Frequency Encoding"], key='encoding', help="Select the encoding method.")


	if enc_feat and encoding != "Select":
	new_line()
	col1, col2, col3 = st.columns([1,0.5,1])
	if col2.button("Apply", key='encoding_apply',use_container_width=True ,help="Click to apply encoding."):
	progress_bar()
	# Ordinal Encoding
	new_line()
	if encoding == "Ordinal Encoding":
	st.session_state.all_the_process += f"""
	# Ordinal Encoding
	from sklearn.preprocessing import OrdinalEncoder
	encoder = OrdinalEncoder()
	cat_cols = {enc_feat}
	df[cat_cols] = encoder.fit_transform(df[cat_cols])
	\n """
	from sklearn.preprocessing import OrdinalEncoder
	encoder = OrdinalEncoder()
	cat_cols = enc_feat
	df[cat_cols] = encoder.fit_transform(df[cat_cols])
	st.session_state['df'] = df
	st.success(f"The Categories of the features `{enc_feat}` have been encoded using Ordinal Encoding.")

	# One Hot Encoding
	elif encoding == "One Hot Encoding":
	st.session_state.all_the_process += f"""
	# One Hot Encoding
	df = pd.get_dummies(df, columns={enc_feat})
	\n """
	df = pd.get_dummies(df, columns=enc_feat)
	st.session_state['df'] = df
	st.success(f"The Categories of the features `{enc_feat}` have been encoded using One Hot Encoding.")

	# Count Frequency Encoding
	elif encoding == "Count Frequency Encoding":
	st.session_state.all_the_process += f"""
	# Count Frequency Encoding
	df[{enc_feat}] = df[{enc_feat}].apply(lambda x: x.map(len(df) / x.value_counts()))
	\n """
	df[enc_feat] = df[enc_feat].apply(lambda x: x.map(len(df) / x.value_counts()))
	st.session_state['df'] = df
	st.success(f"The Categories of the features `{enc_feat}` have been encoded using Count Frequency Encoding.")

	# Show DataFrame Button
	# new_line()
	col1, col2, col3 = st.columns([0.15,1,0.15])
	col2.divider()
	col1, col2, col3 = st.columns([1, 0.7, 1])
	with col2:
	show_df = st.button("Show DataFrame", key="cat_show_df", help="Click to show the DataFrame.")
	if show_df:
	st.dataframe(df, use_container_width=True)


	# Scaling
	new_line()
	st.markdown("### ⚖️ Scaling", unsafe_allow_html=True)
	new_line()
	with st.expander("Show Scaling"):
	new_line()






	# Scaling Methods
	scaling_methods = st.checkbox("Explain Scaling Methods", value=False, key='scaling_methods')
	if scaling_methods:
	new_line()
	col1, col2, col3 = st.columns(3)
	with col1:
	st.markdown("<h6 align='center'> Standard Scaling </h6>" ,unsafe_allow_html=True)
	st.latex(r'''z = \frac{x - \mu}{\sigma}''')
	new_line()
	# Values Ranges for the output of Standard Scaling in general
	st.latex(r'''z \in [-3,3]''')

	with col2:
	st.markdown("<h6 align='center'> MinMax Scaling </h6>", unsafe_allow_html=True)
	st.latex(r'''z = \frac{x - min(x)}{max(x) - min(x)}''')
	new_line()
	# Values Ranges for the output of MinMax Scaling in general
	st.latex(r'''z \in [0,1]''')

	with col3:
	st.markdown("<h6 align='center'> Robust Scaling </h6>", unsafe_allow_html=True)
	st.latex(r'''z = \frac{x - Q_1}{Q_3 - Q_1}''')
	# Values Ranges for the output of Robust Scaling in general
	new_line()
	st.latex(r'''z \in [-2,2]''')

	# write z in the range for the output in latex
	st.latex(r''' Z = The\ Scaled\ Value ''')

	new_line()


	# Ranges for the numeric features
	feat_range = st.checkbox("Further Analysis", value=False, key='feat_range')
	if feat_range:
	new_line()
	st.write("The Ranges for the numeric features:")
	col1, col2, col3 = st.columns([0.05,1, 0.05])
	with col2:
	st.dataframe(df.describe().T, width=700)

	new_line()

	# INPUT
	new_line()
	new_line()
	col1, col2 = st.columns(2)
	with col1:
	scale_feat = st.multiselect("Select Features", df.select_dtypes(include=np.number).columns.tolist(), help="Select the features to be scaled.")

	with col2:
	scaling = st.selectbox("Select Scaling", ["Select", "Standard Scaling", "MinMax Scaling", "Robust Scaling"], help="Select the scaling method.")


	if scale_feat and scaling != "Select":
	new_line()
	col1, col2, col3 = st.columns([1, 0.5, 1])

	if col2.button("Apply", key='scaling_apply',use_container_width=True ,help="Click to apply scaling."):

	progress_bar()

	# Standard Scaling
	if scaling == "Standard Scaling":
	st.session_state.all_the_process += f"""
	# Standard Scaling
	from sklearn.preprocessing import StandardScaler
	scaler = StandardScaler()
	df[{scale_feat}] = pd.DataFrame(scaler.fit_transform(df[{scale_feat}]), columns=df[{scale_feat}].columns)
	\n """
	from sklearn.preprocessing import StandardScaler
	scaler = StandardScaler()
	df[scale_feat] = pd.DataFrame(scaler.fit_transform(df[scale_feat]), columns=df[scale_feat].columns)
	st.session_state['df'] = df
	st.success(f"The Features `{scale_feat}` have been scaled using Standard Scaling.")

	# MinMax Scaling
	elif scaling == "MinMax Scaling":
	st.session_state.all_the_process += f"""
	# MinMax Scaling
	from sklearn.preprocessing import MinMaxScaler
	scaler = MinMaxScaler()
	df[{scale_feat}] = pd.DataFrame(scaler.fit_transform(df[{scale_feat}]), columns=df[{scale_feat}].columns)
	\n """
	from sklearn.preprocessing import MinMaxScaler
	scaler = MinMaxScaler()
	df[scale_feat] = pd.DataFrame(scaler.fit_transform(df[scale_feat]), columns=df[scale_feat].columns)
	st.session_state['df'] = df
	st.success(f"The Features `{scale_feat}` have been scaled using MinMax Scaling.")

	# Robust Scaling
	elif scaling == "Robust Scaling":
	st.session_state.all_the_process += f"""
	# Robust Scaling
	from sklearn.preprocessing import RobustScaler
	scaler = RobustScaler()
	df[{scale_feat}] = pd.DataFrame(scaler.fit_transform(df[{scale_feat}]), columns=df[{scale_feat}].columns)
	\n """
	from sklearn.preprocessing import RobustScaler
	scaler = RobustScaler()
	df[scale_feat] = pd.DataFrame(scaler.fit_transform(df[scale_feat]), columns=df[scale_feat].columns)
	st.session_state['df'] = df
	st.success(f"The Features `{scale_feat}` have been scaled using Robust Scaling.")

	# Show DataFrame Button
	col1, col2, col3 = st.columns([0.15,1,0.15])
	col2.divider()
	col1, col2, col3 = st.columns([0.9, 0.6, 1])
	with col2:
	show_df = st.button("Show DataFrame", key="scaling_show_df", help="Click to show the DataFrame.")
	if show_df:
	st.dataframe(df, use_container_width=True)


	# Data Transformation
	new_line()
	st.markdown("### 🧬 Data Transformation", unsafe_allow_html=True)
	new_line()
	with st.expander("Show Data Transformation"):
	new_line()



	# Transformation Methods
	trans_methods = st.checkbox("Explain Transformation Methods", key="trans_methods", value=False)
	if trans_methods:
	new_line()
	col1, col2, col3, col4 = st.columns(4)
	with col1:
	st.markdown("<h6 align='center'> Log <br> Transformation</h6>", unsafe_allow_html=True)
	st.latex(r'''z = log(x)''')

	with col2:
	st.markdown("<h6 align='center'> Square Root Transformation </h6>", unsafe_allow_html=True)
	st.latex(r'''z = \sqrt{x}''')

	with col3:
	st.markdown("<h6 align='center'> Cube Root Transformation </h6>", unsafe_allow_html=True)
	st.latex(r'''z = \sqrt[3]{x}''')

	with col4:
	st.markdown("<h6 align='center'> Exponential Transformation </h6>", unsafe_allow_html=True)
	st.latex(r'''z = e^x''')



	# INPUT
	new_line()
	col1, col2 = st.columns(2)
	with col1:
	trans_feat = st.multiselect("Select Features", df.select_dtypes(include=np.number).columns.tolist(), help="Select the features you want to transform.", key="transformation features")

	with col2:
	trans = st.selectbox("Select Transformation", ["Select", "Log Transformation", "Square Root Transformation", "Cube Root Transformation", "Exponential Transformation"],
	help="Select the transformation you want to apply.",
	key= "transformation")


	if trans_feat and trans != "Select":
	new_line()
	col1, col2, col3 = st.columns([1, 0.5, 1])
	if col2.button("Apply", key='trans_apply',use_container_width=True ,help="Click to apply transformation."):

	progress_bar()

	# new_line()
	# Log Transformation
	if trans == "Log Transformation":
	st.session_state.all_the_process += f"""
	#Log Transformation
	df[{trans_feat}] = np.log1p(df[{trans_feat}])
	\n """
	df[trans_feat] = np.log1p(df[trans_feat])
	st.session_state['df'] = df
	st.success("Numerical features have been transformed using Log Transformation.")

	# Square Root Transformation
	elif trans == "Square Root Transformation":
	st.session_state.all_the_process += f"""
	#Square Root Transformation
	df[{trans_feat}] = np.sqrt(df[{trans_feat}])
	\n """
	df[trans_feat] = np.sqrt(df[trans_feat])
	st.session_state['df'] = df
	st.success("Numerical features have been transformed using Square Root Transformation.")

	# Cube Root Transformation
	elif trans == "Cube Root Transformation":
	st.session_state.all_the_process += f"""
	#Cube Root Transformation
	df[{trans_feat}] = np.cbrt(df[{trans_feat}])
	\n """
	df[trans_feat] = np.cbrt(df[trans_feat])
	st.session_state['df'] = df
	st.success("Numerical features have been transformed using Cube Root Transformation.")

	# Exponential Transformation
	elif trans == "Exponential Transformation":
	st.session_state.all_the_process += f"""
	#Exponential Transformation
	df[{trans_feat}] = np.exp(df[{trans_feat}])
	\n """
	df[trans_feat] = np.exp(df[trans_feat])
	st.session_state['df'] = df
	st.success("Numerical features have been transformed using Exponential Transformation.")

	# Show DataFrame Button
	# new_line()
	col1, col2, col3 = st.columns([0.15,1,0.15])
	col2.divider()
	col1, col2, col3 = st.columns([0.9, 0.6, 1])
	with col2:
	show_df = st.button("Show DataFrame", key="trans_show_df", help="Click to show the DataFrame.")

	if show_df:
	st.dataframe(df, use_container_width=True)


	# Feature Engineering
	new_line()
	st.markdown("### ⚡ Feature Engineering", unsafe_allow_html=True)
	new_line()
	with st.expander("Show Feature Engineering"):

	# Feature Extraction
	new_line()
	st.markdown("#### Feature Extraction", unsafe_allow_html=True)
	new_line()

	col1, col2, col3 = st.columns(3)
	with col1:
	feat1 = st.selectbox("First Feature/s", ["Select"] + df.select_dtypes(include=np.number).columns.tolist(), key="feat_ex1", help="Select the first feature/s you want to extract.")
	with col2:
	op = st.selectbox("Mathematical Operation", ["Select", "Addition +", "Subtraction -", "Multiplication *", "Division /"], key="feat_ex_op", help="Select the mathematical operation you want to apply.")
	with col3:
	feat2 = st.selectbox("Second Feature/s",["Select"] + df.select_dtypes(include=np.number).columns.tolist(), key="feat_ex2", help="Select the second feature/s you want to extract.")

	if feat1 and op != "Select" and feat2:
	col1, col2, col3 = st.columns(3)
	with col2:
	feat_name = st.text_input("Feature Name", key="feat_name", help="Enter the name of the new feature.")

	col1, col2, col3 = st.columns([1, 0.6, 1])
	new_line()
	if col2.button("Extract Feature"):
	if feat_name == "":
	feat_name = f"({feat1} {op} {feat2})"

	if op == "Addition +":
	st.session_state.all_the_process += f"""
	# Feature Extraction - Addition
	df[{feat_name}] = df[{feat1}] + df[{feat2}]
	\n """
	df[feat_name] = df[feat1] + df[feat2]
	st.session_state['df'] = df
	st.success(f"Feature '_{feat_name}_' has been extracted using Addition.")

	elif op == "Subtraction -":
	st.session_state.all_the_process += f"""
	# Feature Extraction - Subtraction
	df[{feat_name}] = df[{feat1}] - df[{feat2}]
	\n """
	df[feat_name] = df[feat1] - df[feat2]
	st.session_state['df'] = df
	st.success(f"Feature {feat_name} has been extracted using Subtraction.")

	elif op == "Multiplication *":
	st.session_state.all_the_process += f"""
	# Feature Extraction - Multiplication
	df[{feat_name}] = df[{feat1}] * df[{feat2}]
	\n """
	df[feat_name] = df[feat1] * df[feat2]
	st.session_state['df'] = df
	st.success(f"Feature {feat_name} has been extracted using Multiplication.")

	elif op == "Division /":
	st.session_state.all_the_process += f"""
	# Feature Extraction - Division
	df[{feat_name}] = df[{feat1}] / df[{feat2}]
	\n """
	df[feat_name] = df[feat1[0]] / df[feat2[0]]
	st.session_state['df'] = df
	st.success(f"Feature {feat_name} has been extracted using Division.")



	# Feature Transformation
	st.divider()
	st.markdown("#### Feature Transformation", unsafe_allow_html=True)
	new_line()

	col1, col2, col3 = st.columns(3)
	with col1:
	feat_trans = st.multiselect("Select Feature/s", df.select_dtypes(include=np.number).columns.tolist(), help="Select the Features you want to Apply transformation operation on it")
	with col2:
	op = st.selectbox("Select Operation", ["Select", "Addition +", "Subtraction -", "Multiplication *", "Division /", ], key='feat_trans_op', help="Select the operation you want to apply on the feature")
	with col3:
	value = st.text_input("Enter Value", key='feat_trans_val', help="Enter the value you want to apply the operation on it")



	if op != "Select" and value != "":
	new_line()
	col1, col2, col3 = st.columns([1, 0.7, 1])
	if col2.button("Transform Feature"):
	if op == "Addition +":
	st.session_state.all_the_process += f"""
	# Feature Transformation - Addition
	df[{feat_trans}] = df[{feat_trans}] + {value}
	\n """
	df[feat_trans] = df[feat_trans] + float(value)
	st.session_state['df'] = df
	st.success(f"The Features `{feat_trans}` have been transformed using Addition with the value `{value}`.")

	elif op == "Subtraction -":
	st.session_state.all_the_process += f"""
	# Feature Transformation - Subtraction
	df[{feat_trans}] = df[{feat_trans}] - {value}
	\n """
	df[feat_trans] = df[feat_trans] - float(value)
	st.session_state['df'] = df
	st.success(f"The Features `{feat_trans}` have been transformed using Subtraction with the value `{value}`.")

	elif op == "Multiplication *":
	st.session_state.all_the_process += f"""
	# Feature Transformation - Multiplication
	df[{feat_trans}] = df[{feat_trans}] * {value}
	\n """
	df[feat_trans] = df[feat_trans] * float(value)
	st.session_state['df'] = df
	st.success(f"The Features `{feat_trans}` have been transformed using Multiplication with the value `{value}`.")

	elif op == "Division /":
	st.session_state.all_the_process += f"""
	# Feature Transformtaion - Division
	df[{feat_trans}] = df[{feat_trans}] / {value}
	\n """
	df[feat_trans] = df[feat_trans] / float(value)
	st.session_state['df'] = df
	st.success(f"The Featueres `{feat_trans}` have been transformed using Division with the value `{value}`.")



	# Feature Selection
	st.divider()
	st.markdown("#### Feature Selection", unsafe_allow_html=True)
	new_line()

	feat_sel = st.multiselect("Select Feature/s", df.columns.tolist(), key='feat_sel', help="Select the Features you want to keep in the dataset")
	new_line()

	if feat_sel:
	col1, col2, col3 = st.columns([1, 0.7, 1])
	if col2.button("Select Features"):
	st.session_state.all_the_process += f"""
	# Feature Selection\ndf = df[{feat_sel}]
	\n """
	progress_bar()
	new_line()
	df = df[feat_sel]
	st.session_state['df'] = df
	st.success(f"The Features `{feat_sel}` have been selected.")

	# Show DataFrame Button
	col1, col2, col3 = st.columns([0.15,1,0.15])
	col2.divider()
	col1, col2, col3 = st.columns([0.9, 0.6, 1])
	with col2:
	show_df = st.button("Show DataFrame", key="feat_eng_show_df", help="Click to show the DataFrame.")

	if show_df:
	st.dataframe(df, use_container_width=True)


	# Data Splitting
	st.markdown("### 🪚 Data Splitting", unsafe_allow_html=True)
	new_line()
	with st.expander("Show Data Splitting"):

	new_line()
	train_size, val_size, test_size = 0,0,0
	col1, col2 = st.columns(2)
	with col1:
	target = st.selectbox("Select Target Variable", df.columns.tolist(), key='target', help="Target Variable is the variable that you want to predict.")
	st.session_state['target_variable'] = target
	with col2:
	sets = st.selectbox("Select The Split Sets", ["Select", "Train and Test", "Train, Validation, and Test"], key='sets', help="Train Set is the data used to train the model. Validation Set is the data used to validate the model. Test Set is the data used to test the model. ")
	st.session_state['split_sets'] = sets

	if sets != "Select" and target:
	if sets == "Train, Validation, and Test" :
	new_line()
	col1, col2, col3 = st.columns(3)
	with col1:
	train_size = st.number_input("Train Size", min_value=0.0, max_value=1.0, value=0.7, step=0.05, key='train_size')
	train_size = round(train_size, 2)
	with col2:
	val_size = st.number_input("Validation Size", min_value=0.0, max_value=1.0, value=0.15, step=0.05, key='val_size')
	val_size = round(val_size, 2)
	with col3:
	test_size = st.number_input("Test Size", min_value=0.0, max_value=1.0, value=0.15, step=0.05, key='test_size')
	test_size = round(test_size, 2)

	if float(train_size + val_size + test_size) != 1.0:
	new_line()
	st.error(f"The sum of Train, Validation, and Test sizes must be equal to 1.0, your sum is: train + validation + test = {train_size} + {val_size} + {test_size} = {sum([train_size, val_size, test_size])}" )
	new_line()

	else:
	split_button = ""
	col1, col2, col3 = st.columns([1, 0.5, 1])
	with col2:
	new_line()
	split_button = st.button("Split Data", use_container_width=True)

	if split_button:
	st.session_state.all_the_process += f"""
	# Data Splitting
	from sklearn.model_selection import train_test_split
	X_train, X_rem, y_train, y_rem = train_test_split(df.drop('{target}', axis=1), df['{target}'], train_size={train_size}, random_state=42)
	X_val, X_test, y_val, y_test = train_test_split(X_rem, y_rem, train_size= {val_size} / (1.0 - {train_size}),random_state=42)
	\n """
	from sklearn.model_selection import train_test_split
	X_train, X_rem, y_train, y_rem = train_test_split(df.drop(target, axis=1), df[target], train_size=train_size, random_state=42)
	X_val, X_test, y_val, y_test = train_test_split(X_rem, y_rem, train_size= val_size / (1.0 - train_size),random_state=42)
	st.session_state['X_train'] = X_train
	st.session_state['X_val'] = X_val
	st.session_state['X_test'] = X_test
	st.session_state['y_train'] = y_train
	st.session_state['y_val'] = y_val
	st.session_state['y_test'] = y_test


	col1, col2, col3 = st.columns(3)
	if split_button:
	st.success("Data Splitting Done!")
	with col1:
	st.write("Train Set")
	st.write("X Train Shape: ", X_train.shape)
	st.write("Y Train Shape: ", y_train.shape)

	train = pd.concat([X_train, y_train], axis=1)
	train_csv = train.to_csv(index=False).encode('utf-8')
	st.download_button("Download Train Set", train_csv, "train.csv", "text/csv", key='train3')

	with col2:
	st.write("Validation Set")
	st.write("X Validation Shape: ", X_val.shape)
	st.write("Y Validation Shape: ", y_val.shape)

	val = pd.concat([X_val, y_val], axis=1)
	val_csv = val.to_csv(index=False).encode('utf-8')
	st.download_button("Download Validation Set", val_csv, "validation.csv", key='val3')

	with col3:
	st.write("Test Set")
	st.write("X Test Shape: ", X_test.shape)
	st.write("Y Test Shape: ", y_test.shape)

	test = pd.concat([X_test, y_test], axis=1)
	test_csv = test.to_csv(index=False).encode('utf-8')
	st.download_button("Download Test Set", test_csv, "test.csv", key='test3')


	elif sets == "Train and Test":

	new_line()
	col1, col2 = st.columns(2)
	with col1:
	train_size = st.number_input("Train Size", min_value=0.0, max_value=1.0, value=0.7, step=0.05, key='train_size')
	train_size = round(train_size, 2)
	with col2:
	test_size = st.number_input("Test Size", min_value=0.0, max_value=1.0, value=0.30, step=0.05, key='val_size')
	test_size = round(test_size, 2)

	if float(train_size + test_size) != 1.0:
	new_line()
	st.error(f"The sum of Train, Validation, and Test sizes must be equal to 1.0, your sum is: train + test = {train_size} + {test_size} = {sum([train_size, test_size])}" )
	new_line()

	else:
	split_button = ""
	col1, col2, col3 = st.columns([1, 0.5, 1])
	with col2:
	new_line()
	split_button = st.button("Split Data")

	if split_button:
	st.session_state.all_the_process += f"""
	# Data Splitting
	from sklearn.model_selection import train_test_split
	X_train, X_test, y_train, y_test = train_test_split(df.drop('{target}', axis=1), df['{target}'], train_size={train_size}, random_state=42)
	\n """
	from sklearn.model_selection import train_test_split
	X_train, X_test, y_train, y_test = train_test_split(df.drop(target, axis=1), df[target], train_size=train_size, random_state=42)
	st.session_state['X_train'] = X_train
	st.session_state['X_test'] = X_test
	st.session_state['y_train'] = y_train
	st.session_state['y_test'] = y_test



	col1, col2 = st.columns(2)
	if split_button:
	st.success("Data Splitting Done!")
	with col1:
	st.write("Train Set")
	st.write("X Train Shape: ", X_train.shape)
	st.write("Y Train Shape: ", y_train.shape)

	train = pd.concat([X_train, y_train], axis=1)
	train_csv = train.to_csv(index=False).encode('utf-8')
	st.download_button("Download Train Set", train_csv, "train.csv", key='train2')

	with col2:
	st.write("Test Set")
	st.write("X test Shape: ", X_test.shape)
	st.write("Y test Shape: ", y_test.shape)

	test = pd.concat([X_test, y_test], axis=1)
	test_csv = test.to_csv(index=False).encode('utf-8')
	st.download_button("Download Test Set", test_csv, "test.csv", key='test2')


	# Building the model
	new_line()
	st.markdown("### 🤖 Building the Model")
	new_line()
	problem_type = ""
	with st.expander(" Model Building"):

	target, problem_type, model = "", "", ""
	col1, col2, col3 = st.columns(3)

	with col1:
	target = st.selectbox("Target Variable", [st.session_state['target_variable']] , key='target_ml', help="The target variable is the variable that you want to predict")
	new_line()

	with col2:
	problem_type = st.selectbox("Problem Type", ["Select", "Classification", "Regression"], key='problem_type', help="The problem type is the type of problem that you want to solve")

	with col3:

	if problem_type == "Classification":
	model = st.selectbox("Model", ["Select", "Logistic Regression", "K-Nearest Neighbors", "Support Vector Machine", "Decision Tree", "Random Forest", "XGBoost", "LightGBM", "CatBoost"],
	key='model', help="The model is the algorithm that you want to use to solve the problem")
	new_line()

	elif problem_type == "Regression":
	model = st.selectbox("Model", ["Linear Regression", "K-Nearest Neighbors", "Support Vector Machine", "Decision Tree", "Random Forest", "XGBoost", "LightGBM", "CatBoost"],
	key='model', help="The model is the algorithm that you want to use to solve the problem")
	new_line()


	if target != "Select" and problem_type and model:

	if problem_type == "Classification":

	if model == "Logistic Regression":

	col1, col2, col3 = st.columns(3)
	with col1:
	penalty = st.selectbox("Penalty (Optional)", ["l2", "l1", "none", "elasticnet"], key='penalty')

	with col2:
	solver = st.selectbox("Solver (Optional)", ["lbfgs", "newton-cg", "liblinear", "sag", "saga"], key='solver')

	with col3:
	C = st.number_input("C (Optional)", min_value=0.0, max_value=1.0, value=1.0, step=0.05, key='C')


	col1, col2, col3 = st.columns([1,1,1])
	if col2.button("Train Model", use_container_width=True):


	progress_bar()

	st.session_state['trained_model_bool'] = True

	# Train the model
	st.session_state.all_the_process += f"""
	# Model Building --> Logistic Regression
	from sklearn.linear_model import LogisticRegression
	model = LogisticRegression(penalty='{penalty}', solver='{solver}', C={C}, random_state=42)
	model.fit(X_train, y_train)
	\n """
	from sklearn.linear_model import LogisticRegression
	model = LogisticRegression(penalty=penalty, solver=solver, C=C, random_state=42)
	model.fit(X_train, y_train)
	st.session_state['trained_model'] = model
	st.success("Model Trained Successfully!")

	# save the model
	import joblib
	joblib.dump(model, 'model.pkl')

	# Download the model
	model_file = open("model.pkl", "rb")
	model_bytes = model_file.read()
	col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model')

	if model == "K-Nearest Neighbors":

	col1, col2, col3 = st.columns(3)
	with col1:
	n_neighbors = st.number_input("N Neighbors Required", min_value=1, max_value=100, value=5, step=1, key='n_neighbors')

	with col2:
	weights = st.selectbox("Weights (Optional)", ["uniform", "distance"], key='weights')

	with col3:
	algorithm = st.selectbox("Algorithm (Optional)", ["auto", "ball_tree", "kd_tree", "brute"], key='algorithm')


	col1, col2, col3 = st.columns([1,0.7,1])
	if col2.button("Train Model", use_container_width=True):
	progress_bar()

	st.session_state['trained_model_bool'] = True

	# Train the model
	st.session_state.all_the_process += f"""
	# Model Building --> K-Nearest Neighbors
	from sklearn.neighbors import KNeighborsClassifier
	model = KNeighborsClassifier(n_neighbors={n_neighbors}, weights='{weights}', algorithm='{algorithm}')
	model.fit(X_train, y_train)
	\n """
	from sklearn.neighbors import KNeighborsClassifier
	model = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights, algorithm=algorithm)
	model.fit(X_train, y_train)
	st.session_state['trained_model'] = model
	st.success("Model Trained Successfully!")

	# save the model
	import joblib
	joblib.dump(model, 'model.pkl')

	# Download the model
	model_file = open("model.pkl", "rb")
	model_bytes = model_file.read()
	col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model')

	if model == "Support Vector Machine":

	col1, col2, col3 = st.columns(3)
	with col1:
	kernel = st.selectbox("Kernel (Optional)", ["rbf", "poly", "linear", "sigmoid", "precomputed"], key='kernel')

	with col2:
	degree = st.number_input("Degree (Optional)", min_value=1, max_value=100, value=3, step=1, key='degree')

	with col3:
	C = st.number_input("C (Optional)", min_value=0.0, max_value=1.0, value=1.0, step=0.05, key='C')


	col1, col2, col3 = st.columns([1,0.7,1])
	if col2.button("Train Model", use_container_width=True):

	progress_bar()
	st.session_state['trained_model_bool'] = True

	# Train the model
	st.session_state.all_the_process += f"""
	# Model Building --> Support Vector Machine
	from sklearn.svm import SVC
	model = SVC(kernel='{kernel}', degree={degree}, C={C}, random_state=42)
	model.fit(X_train, y_train)
	\n """
	from sklearn.svm import SVC
	model = SVC(kernel=kernel, degree=degree, C=C, random_state=42)
	model.fit(X_train, y_train)
	st.session_state['trained_model'] = model
	st.success("Model Trained Successfully!")

	# save the model
	import joblib
	joblib.dump(model, 'model.pkl')

	# Download the model
	model_file = open("model.pkl", "rb")
	model_bytes = model_file.read()
	col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model')

	if model == "Decision Tree":

	col1, col2, col3 = st.columns(3)
	with col1:
	criterion = st.selectbox("Criterion (Optional)", ["gini", "entropy", "log_loss"], key='criterion')

	with col2:
	splitter = st.selectbox("Splitter (Optional)", ["best", "random"], key='splitter')

	with col3:
	min_samples_split = st.number_input("Min Samples Split (Optional)", min_value=1, max_value=100, value=2, step=1, key='min_samples_split')

	col1, col2, col3 = st.columns([1,0.7,1])
	if col2.button("Train Model", use_container_width=True):
	progress_bar()
	st.session_state['trained_model_bool'] = True

	# Train the model
	st.session_state.all_the_process += f"""
	# Model Building --> Decision Tree
	from sklearn.tree import DecisionTreeClassifier
	model = DecisionTreeClassifier(criterion='{criterion}', splitter='{splitter}', min_samples_split={min_samples_split}, random_state=42)
	model.fit(X_train, y_train)
	\n """
	from sklearn.tree import DecisionTreeClassifier
	model = DecisionTreeClassifier(criterion=criterion, splitter=splitter, min_samples_split=min_samples_split, random_state=42)
	model.fit(X_train, y_train)
	st.session_state['trained_model'] = model
	st.success("Model Trained Successfully!")

	# save the model
	import joblib
	joblib.dump(model, 'model.pkl')

	# Download the model
	model_file = open("model.pkl", "rb")
	model_bytes = model_file.read()
	col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model')

	if model == "Random Forest":

	col1, col2, col3 = st.columns(3)
	with col1:
	n_estimators = st.number_input("N Estimators (Optional)", min_value=1, max_value=1000, value=100, step=5, key='n_estimators')

	with col2:
	criterion = st.selectbox("Criterion (Optional)", ["gini", "entropy", "log_loss"], key='criterion')

	with col3:
	min_samples_split = st.number_input("Min Samples Split (Optional)", min_value=1, max_value=100, value=2, step=1, key='min_samples_split')

	col1, col2, col3 = st.columns([1,0.7,1])
	if col2.button("Train Model", use_container_width=True):
	progress_bar()
	st.session_state['trained_model_bool'] = True

	# Train the model
	st.session_state.all_the_process += f"""
	# Model Building --> Random Forest
	from sklearn.ensemble import RandomForestClassifier
	model = RandomForestClassifier(n_estimators={n_estimators}, criterion='{criterion}', min_samples_split={min_samples_split}, random_state=42)
	model.fit(X_train, y_train)
	\n """
	from sklearn.ensemble import RandomForestClassifier
	model = RandomForestClassifier(n_estimators=n_estimators, criterion=criterion, min_samples_split=min_samples_split, random_state=42)
	model.fit(X_train, y_train)
	st.session_state['trained_model'] = model
	st.success("Model Trained Successfully!")

	# save the model
	import joblib
	joblib.dump(model, 'model.pkl')

	# Download the model
	model_file = open("model.pkl", "rb")
	model_bytes = model_file.read()
	col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model')

	if model == "XGBoost":

	col1, col2, col3 = st.columns(3)
	with col1:
	n_estimators = st.number_input("N Estimators (Optional)", min_value=1, max_value=1000, value=100, step=5, key='n_estimators')

	with col2:
	learning_rate = st.number_input("Learning Rate (Optional)", min_value=0.0, max_value=1.0, value=0.1, step=0.05, key='learning_rate')

	with col3:
	booster = st.selectbox("Booster (Optional)", ["gbtree", "gblinear", "dart"], key='booster')

	col1, col2, col3 = st.columns([1,0.7,1])
	if col2.button("Train Model"):
	progress_bar()
	st.session_state['trained_model_bool'] = True

	# Train the model
	st.session_state.all_the_process += f"""
	# Model Building --> XGBoost
	from xgboost import XGBClassifier
	model = XGBClassifier(n_estimators={n_estimators}, learning_rate={learning_rate}, booster='{booster}', random_state=42)
	model.fit(X_train, y_train)
	\n """
	from xgboost import XGBClassifier
	model = XGBClassifier(n_estimators=n_estimators, learning_rate=learning_rate, booster=booster, random_state=42)
	model.fit(X_train, y_train)
	st.session_state['trained_model'] = model
	st.success("Model Trained Successfully!")

	# save the model
	import joblib
	joblib.dump(model, 'model.pkl')

	# Download the model
	model_file = open("model.pkl", "rb")
	model_bytes = model_file.read()
	col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model')

	if model == 'LightGBM':

	col1, col2, col3 = st.columns(3)
	with col1:
	n_estimators = st.number_input("N Estimators (Optional)", min_value=1, max_value=1000, value=100, step=5, key='n_estimators')

	with col2:
	learning_rate = st.number_input("Learning Rate (Optional)", min_value=0.0, max_value=1.0, value=0.1, step=0.05, key='learning_rate')

	with col3:
	boosting_type = st.selectbox("Boosting Type (Optional)", ["gbdt", "dart", "goss", "rf"], key='boosting_type')

	col1, col2, col3 = st.columns([1,0.7,1])
	if col2.button("Train Model"):
	progress_bar()
	st.session_state['trained_model_bool'] = True

	# Train the model
	st.session_state.all_the_process += f"""
	# Model Building --> LightGBM
	from lightgbm import LGBMClassifier
	model = LGBMClassifier(n_estimators={n_estimators}, learning_rate={learning_rate}, boosting_type='{boosting_type}', random_state=42)
	model.fit(X_train, y_train)
	\n """
	from lightgbm import LGBMClassifier
	model = LGBMClassifier(n_estimators=n_estimators, learning_rate=learning_rate, boosting_type=boosting_type, random_state=42)
	model.fit(X_train, y_train)
	st.session_state['trained_model'] = model
	st.success("Model Trained Successfully!")

	# save the model
	import joblib
	joblib.dump(model, 'model.pkl')

	# Download the model
	model_file = open("model.pkl", "rb")
	model_bytes = model_file.read()
	col2.download_button("Download Model", model_bytes, "model.pkl", key='save_model')

	if model == 'CatBoost':

	col1, col2, col3 = st.columns(3)
	with col1:
	n_estimators = st.number_input("N Estimators (Optional)", min_value=1, max_value=1000, value=100, step=5, key='n_estimators')

	with col2:
	learning_rate = st.number_input("Learning Rate (Optional)", min_value=0.0, max_value=1.0, value=0.1, step=0.05, key='learning_rate')

	with col3:
	boosting_type = st.selectbox("Boosting Type (Optional)", ["Ordered", "Plain"], key='boosting_type')

	col1, col2, col3 = st.columns([1,0.7,1])
	if col2.button("Train Model"):
	progress_bar()
	st.session_state['trained_model_bool'] = True

	# Train the model
	st.session_state.all_the_process += f"""
	# Model Building --> CatBoost
	from catboost import CatBoostClassifier
	model = CatBoostClassifier(n_estimators={n_estimators}, learning_rate={learning_rate}, boosting_type='{boosting_type}', random_state=42)
	model.fit(X_train, y_train)
	\n """
	from catboost import CatBoostClassifier
	model = CatBoostClassifier(n_estimators=n_estimators, learning_rate=learning_rate, boosting_type=boosting_type, random_state=42)
	model.fit(X_train, y_train)
	st.session_state['trained_model'] = model
	st.success("Model Trained Successfully!")

	# save the model
	import joblib
	joblib.dump(model, 'model.pkl')

	# Download the model
	model_file = open("model.pkl", "rb")
	model_bytes = model_file.read()
	col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model')

	if problem_type == "Regression":

	if model == "Linear Regression":

	col1, col2, col3 = st.columns(3)
	with col1:
	fit_intercept = st.selectbox("Fit Intercept (Optional)", [True, False], key='normalize')

	with col2:
	positive = st.selectbox("Positve (Optional)", [True, False], key='positive')

	with col3:
	copy_x = st.selectbox("Copy X (Optional)", [True, False], key='copy_x')

	col1, col2, col3 = st.columns([1,0.7,1])
	if col2.button("Train Model"):
	progress_bar()
	st.session_state['trained_model_bool'] = True

	# Train the model
	st.session_state.all_the_process += f"""
	# Model Building --> Linear Regression
	from sklearn.linear_model import LinearRegression
	model = LinearRegression(fit_intercept={fit_intercept}, positive={positive}, copy_X={copy_x})
	model.fit(X_train, y_train)
	\n """
	from sklearn.linear_model import LinearRegression
	model = LinearRegression(fit_intercept=fit_intercept, positive=positive, copy_X=copy_x)
	model.fit(X_train, y_train)
	st.session_state['trained_model'] = model
	st.success("Model Trained Successfully!")

	# save the model
	import joblib
	joblib.dump(model, 'model.pkl')

	# Download the model
	model_file = open("model.pkl", "rb")
	model_bytes = model_file.read()
	col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model')

	if model == "K-Nearest Neighbors":

	col1, col2, col3 = st.columns(3)
	with col1:
	n_neighbors = st.number_input("N Neighbors (Optional)", min_value=1, max_value=100, value=5, step=1, key='n_neighbors')

	with col2:
	weights = st.selectbox("Weights (Optional)", ["uniform", "distance"], key='weights')

	with col3:
	algorithm = st.selectbox("Algorithm (Optional)", ["auto", "ball_tree", "kd_tree", "brute"], key='algorithm')

	col1, col2, col3 = st.columns([1,0.7,1])
	if col2.button("Train Model"):
	progress_bar()
	st.session_state['trained_model_bool'] = True

	# Train the model
	st.session_state.all_the_process += f"""
	# Model Building --> K-Nearest Neighbors
	from sklearn.neighbors import KNeighborsRegressor
	model = KNeighborsRegressor(n_neighbors={n_neighbors}, weights='{weights}', algorithm='{algorithm}')
	model.fit(X_train, y_train)
	\n """
	from sklearn.neighbors import KNeighborsRegressor
	model = KNeighborsRegressor(n_neighbors=n_neighbors, weights=weights, algorithm=algorithm)
	model.fit(X_train, y_train)
	st.session_state['trained_model'] = model
	st.success("Model Trained Successfully!")

	# save the model
	import joblib
	joblib.dump(model, 'model.pkl')

	# Download the model
	model_file = open("model.pkl", "rb")
	model_bytes = model_file.read()
	col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model')

	if model == "Support Vector Machine":

	col1, col2, col3 = st.columns(3)
	with col1:
	kernel = st.selectbox("Kernel (Optional)", ["linear", "poly", "rbf", "sigmoid", "precomputed"], key='kernel')

	with col2:
	degree = st.number_input("Degree (Optional)", min_value=1, max_value=10, value=3, step=1, key='degree')

	with col3:
	gamma = st.selectbox("Gamma (Optional)", ["scale", "auto"], key='gamma')

	col1, col2, col3 = st.columns([1,0.7,1])
	if col2.button("Train Model"):
	progress_bar()
	st.session_state['trained_model_bool'] = True

	# Train the model
	st.session_state.all_the_process += f"""
	# Model Building --> Support Vector Machine
	from sklearn.svm import SVR
	model = SVR(kernel='{kernel}', degree={degree}, gamma='{gamma}')
	model.fit(X_train, y_train)
	\n """
	from sklearn.svm import SVR
	model = SVR(kernel=kernel, degree=degree, gamma=gamma)
	model.fit(X_train, y_train)
	st.session_state['trained_model'] = model
	st.success("Model Trained Successfully!")

	# save the model
	import joblib
	joblib.dump(model, 'model.pkl')

	# Download the model
	model_file = open("model.pkl", "rb")
	model_bytes = model_file.read()
	col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model')

	if model == "Decision Tree":

	col1, col2, col3 = st.columns(3)
	with col1:
	criterion = st.selectbox("Criterion (Optional)", ["squared_error", "friedman_mse", "absolute_error", "poisson"], key='criterion')

	with col2:
	splitter = st.selectbox("Splitter (Optional)", ["best", "random"], key='splitter')

	with col3:
	min_samples_split = st.number_input("Min Samples Split (Optional)", min_value=1, max_value=10, value=2, step=1, key='min_samples_split')

	col1, col2, col3 = st.columns([1,0.7,1])
	if col2.button("Train Model"):
	progress_bar()
	st.session_state['trained_model_bool'] = True

	# Train the model
	st.session_state.all_the_process += f"""
	# Model Building --> Decision Tree
	from sklearn.tree import DecisionTreeRegressor
	model = DecisionTreeRegressor(criterion='{criterion}', splitter='{splitter}', min_samples_split={min_samples_split})
	model.fit(X_train, y_train)
	\n """
	from sklearn.tree import DecisionTreeRegressor
	model = DecisionTreeRegressor(criterion=criterion, splitter=splitter, min_samples_split=min_samples_split)
	model.fit(X_train, y_train)
	st.session_state['trained_model'] = model
	st.success("Model Trained Successfully!")

	# save the model
	import joblib
	joblib.dump(model, 'model.pkl')

	# Download the model
	model_file = open("model.pkl", "rb")
	model_bytes = model_file.read()
	col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model')

	if model == "Random Forest":

	col1, col2, col3 = st.columns(3)
	with col1:
	n_estimators = st.number_input("N Estimators (Optional)", min_value=1, max_value=1000, value=100, step=1, key='n_estimators')

	with col2:
	criterion = st.selectbox("Criterion (Optional)", ["squared_error", "friedman_mse", "absolute_error", "poisson"], key='criterion')

	with col3:
	min_samples_split = st.number_input("Min Samples Split (Optional)", min_value=1, max_value=10, value=2, step=1, key='min_samples_split')

	col1, col2, col3 = st.columns([1,0.7,1])
	if col2.button("Train Model"):
	progress_bar()
	st.session_state['trained_model_bool'] = True

	# Train the model
	st.session_state.all_the_process += f"""
	# Model Building --> Random Forest
	from sklearn.ensemble import RandomForestRegressor
	model = RandomForestRegressor(n_estimators={n_estimators}, criterion='{criterion}', min_samples_split={min_samples_split})
	model.fit(X_train, y_train)
	\n """
	from sklearn.ensemble import RandomForestRegressor
	model = RandomForestRegressor(n_estimators=n_estimators, criterion=criterion, min_samples_split=min_samples_split)
	model.fit(X_train, y_train)
	st.session_state['trained_model'] = model
	st.success("Model Trained Successfully!")

	# save the model
	import joblib
	joblib.dump(model, 'model.pkl')

	# Download the model
	model_file = open("model.pkl", "rb")
	model_bytes = model_file.read()
	col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model')

	if model == "XGBoost":

	col1, col2, col3 = st.columns(3)
	with col1:
	n_estimators = st.number_input("N Estimators (Optional)", min_value=1, max_value=1000, value=100, step=1, key='n_estimators')

	with col2:
	learning_rate = st.number_input("Learning Rate (Optional)", min_value=0.0001, max_value=1.0, value=0.1, step=0.1, key='learning_rate')

	with col3:
	booster = st.selectbox("Booster (Optional)", ["gbtree", "gblinear", "dart"], key='booster')

	col1, col2, col3 = st.columns([1,0.7,1])
	if col2.button("Train Model"):
	progress_bar()
	st.session_state['trained_model_bool'] = True

	# Train the model
	st.session_state.all_the_process += f"""
	# Model Building --> XGBoost
	from xgboost import XGBRegressor
	model = XGBRegressor(n_estimators={n_estimators}, learning_rate={learning_rate}, booster='{booster}')
	model.fit(X_train, y_train)
	\n """
	from xgboost import XGBRegressor
	model = XGBRegressor(n_estimators=n_estimators, learning_rate=learning_rate, booster=booster)
	model.fit(X_train, y_train)
	st.session_state['trained_model'] = model
	st.success("Model Trained Successfully!")

	# save the model
	import joblib
	joblib.dump(model, 'model.pkl')

	# Download the model
	model_file = open("model.pkl", "rb")
	model_bytes = model_file.read()
	col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model')

	if model == "LightGBM":

	col1, col2, col3 = st.columns(3)
	with col1:
	n_estimators = st.number_input("N Estimators (Optional)", min_value=1, max_value=1000, value=100, step=1, key='n_estimators')

	with col2:
	learning_rate = st.number_input("Learning Rate (Optional)", min_value=0.1, max_value=1.0, value=0.1, step=0.1, key='learning_rate')

	with col3:
	boosting_type = st.selectbox("Boosting Type (Optional)", ["gbdt", "dart", "goss", "rf"], key='boosting_type')

	col1, col2, col3 = st.columns([1,0.7,1])
	if col2.button("Train Model"):
	progress_bar()
	st.session_state['trained_model_bool'] = True

	# Train the model
	st.session_state.all_the_process += f"""
	# Model Building --> LightGBM
	from lightgbm import LGBMRegressor
	model = LGBMRegressor(n_estimators={n_estimators}, learning_rate={learning_rate}, boosting_type='{boosting_type}')
	model.fit(X_train, y_train)
	\n """
	from lightgbm import LGBMRegressor
	model = LGBMRegressor(n_estimators=n_estimators, learning_rate=learning_rate, boosting_type=boosting_type)
	model.fit(X_train, y_train)
	st.session_state['trained_model'] = model
	st.success("Model Trained Successfully!")

	# save the model
	import joblib
	joblib.dump(model, 'model.pkl')

	# Download the model
	model_file = open("model.pkl", "rb")
	model_bytes = model_file.read()
	col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model')

	if model == "CatBoost":

	col1, col2, col3 = st.columns(3)
	with col1:
	n_estimators = st.number_input("N Estimators (Optional)", min_value=1, max_value=1000, value=100, step=1, key='n_estimators')

	with col2:
	learning_rate = st.number_input("Learning Rate (Optional)", min_value=0.1, max_value=1.0, value=0.1, step=0.1, key='learning_rate')

	with col3:
	boosting_type = st.selectbox("Boosting Type (Optional)", ["Ordered", "Plain"], key='boosting_type')

	col1, col2, col3 = st.columns([1,0.7,1])
	if col2.button("Train Model"):
	progress_bar()
	st.session_state['trained_model_bool'] = True

	# Train the model
	st.session_state.all_the_process += f"""
	# Model Building --> CatBoost
	from catboost import CatBoostRegressor
	model = CatBoostRegressor(n_estimators={n_estimators}, learning_rate={learning_rate}, boosting_type='{boosting_type}')
	model.fit(X_train, y_train)
	\n """
	from catboost import CatBoostRegressor
	model = CatBoostRegressor(n_estimators=n_estimators, learning_rate=learning_rate, boosting_type=boosting_type)
	model.fit(X_train, y_train)
	st.session_state['trained_model'] = model
	st.success("Model Trained Successfully!")

	# save the model
	import joblib
	joblib.dump(model, 'model.pkl')

	# Download the model
	model_file = open("model.pkl", "rb")
	model_bytes = model_file.read()
	col2.download_button("Download Model", model_bytes, "model.pkl", use_container_width=True, key='save_model')


	# Evaluation
	if st.session_state['trained_model_bool']:
	st.markdown("### 📈 Evaluation")
	new_line()
	with st.expander("Model Evaluation"):
	# Load the model
	import joblib
	model = joblib.load('model.pkl')


	if str(model) not in st.session_state.lst_models_predctions:

	st.session_state.lst_models_predctions.append(str(model))
	st.session_state.lst_models.append(str(model))
	if str(model) not in st.session_state.models_with_eval.keys():
	st.session_state.models_with_eval[str(model)] = []




	# Predictions
	if st.session_state["split_sets"] == "Train, Validation, and Test":

	st.session_state.all_the_process += f"""
	# Predictions
	y_pred_train = model.predict(X_train)
	y_pred_val = model.predict(X_val)
	y_pred_test = model.predict(X_test)
	\n """
	y_pred_train = model.predict(X_train)
	st.session_state.y_pred_train = y_pred_train
	y_pred_val = model.predict(X_val)
	st.session_state.y_pred_val = y_pred_val
	y_pred_test = model.predict(X_test)
	st.session_state.y_pred_test = y_pred_test


	elif st.session_state["split_sets"] == "Train and Test":

	st.session_state.all_the_process += f"""
	# Predictions
	y_pred_train = model.predict(X_train)
	y_pred_test = model.predict(X_test)
	\n """

	y_pred_train = model.predict(X_train)
	st.session_state.y_pred_train = y_pred_train
	y_pred_test = model.predict(X_test)
	st.session_state.y_pred_test = y_pred_test

	# Choose Evaluation Metric
	if st.session_state['problem_type'] == "Classification":
	evaluation_metric = st.multiselect("Evaluation Metric", ["Accuracy", "Precision", "Recall", "F1 Score", "AUC Score"], key='evaluation_metric')

	elif st.session_state['problem_type'] == "Regression":
	evaluation_metric = st.multiselect("Evaluation Metric", ["Mean Absolute Error (MAE)", "Mean Squared Error (MSE)", "Root Mean Squared Error (RMSE)", "R2 Score"], key='evaluation_metric')


	col1, col2, col3 = st.columns([1, 0.6, 1])

	st.session_state.show_eval = True


	if evaluation_metric != []:


	for metric in evaluation_metric:


	if metric == "Accuracy":

	# Check if Accuary is element of the list of that model
	if "Accuracy" not in st.session_state.models_with_eval[str(model)]:

	st.session_state.models_with_eval[str(model)].append("Accuracy")

	if st.session_state["split_sets"] == "Train, Validation, and Test":

	st.session_state.all_the_process += f"""
	# Evaluation - Accuracy
	from sklearn.metrics import accuracy_score
	print("Accuracy Score on Train Set: ", accuracy_score(y_train, y_pred_train))
	print("Accuracy Score on Validation Set: ", accuracy_score(y_val, y_pred_val))
	print("Accuracy Score on Test Set: ", accuracy_score(y_test, y_pred_test))
	\n """
	from sklearn.metrics import accuracy_score
	train_acc = accuracy_score(y_train, y_pred_train)
	val_acc = accuracy_score(y_val, y_pred_val)
	test_acc = accuracy_score(y_test, y_pred_test)

	metrics_df[metric] = [train_acc, val_acc, test_acc]
	st.session_state['metrics_df'] = metrics_df


	else:
	st.session_state.all_the_process += f"""
	# Evaluation - Accuracy
	from sklearn.metrics import accuracy_score
	print("Accuracy Score on Train Set: ", accuracy_score(y_train, y_pred_train))
	print("Accuracy Score on Test Set: ", accuracy_score(y_test, y_pred_test))
	\n """

	from sklearn.metrics import accuracy_score
	train_acc = accuracy_score(y_train, y_pred_train)
	test_acc = accuracy_score(y_test, y_pred_test)

	metrics_df[metric] = [train_acc, test_acc]
	st.session_state['metrics_df'] = metrics_df


	elif metric == "Precision":

	if "Precision" not in st.session_state.models_with_eval[str(model)]:

	st.session_state.models_with_eval[str(model)].append("Precision")

	if st.session_state["split_sets"] == "Train, Validation, and Test":

	st.session_state.all_the_process += f"""
	# Evaluation - Precision
	from sklearn.metrics import precision_score
	print("Precision Score on Train Set: ", precision_score(y_train, y_pred_train))
	print("Precision Score on Validation Set: ", precision_score(y_val, y_pred_val))
	print("Precision Score on Test Set: ", precision_score(y_test, y_pred_test))
	\n """
	from sklearn.metrics import precision_score
	train_prec = precision_score(y_train, y_pred_train)
	val_prec = precision_score(y_val, y_pred_val)
	test_prec = precision_score(y_test, y_pred_test)

	metrics_df[metric] = [train_prec, val_prec, test_prec]
	st.session_state['metrics_df'] = metrics_df

	else:
	st.session_state.all_the_process += f"""
	# Evaluation - Precision
	from sklearn.metrics import precision_score
	print("Precision Score on Train Set: ", precision_score(y_train, y_pred_train))
	print("Precision Score on Test Set: ", precision_score(y_test, y_pred_test))
	\n """
	from sklearn.metrics import precision_score
	train_prec = precision_score(y_train, y_pred_train)
	test_prec = precision_score(y_test, y_pred_test)

	metrics_df[metric] = [train_prec, test_prec]
	st.session_state['metrics_df'] = metrics_df


	elif metric == "Recall":

	if "Recall" not in st.session_state.models_with_eval[str(model)]:

	st.session_state.models_with_eval[str(model)].append("Recall")

	if st.session_state["split_sets"] == "Train, Validation, and Test":

	st.session_state.all_the_process += f"""
	# Evaluation - Recall
	from sklearn.metrics import recall_score
	print("Recall Score on Train Set: ", recall_score(y_train, y_pred_train))
	print("Recall Score on Validation Set: ", recall_score(y_val, y_pred_val))
	print("Recall Score on Test Set: ", recall_score(y_test, y_pred_test))
	\n """
	from sklearn.metrics import recall_score
	train_rec = recall_score(y_train, y_pred_train)
	val_rec = recall_score(y_val, y_pred_val)
	test_rec = recall_score(y_test, y_pred_test)

	metrics_df[metric] = [train_rec, val_rec, test_rec]
	st.session_state['metrics_df'] = metrics_df

	else:
	st.session_state.all_the_process += f"""
	# Evaluation - Recall
	from sklearn.metrics import recall_score
	print("Recall Score on Train Set: ", recall_score(y_train, y_pred_train))
	print("Recall Score on Test Set: ", recall_score(y_test, y_pred_test))
	\n """
	from sklearn.metrics import recall_score
	train_rec = recall_score(y_train, y_pred_train)
	test_rec = recall_score(y_test, y_pred_test)

	metrics_df[metric] = [train_rec, test_rec]
	st.session_state['metrics_df'] = metrics_df


	elif metric == "F1 Score":

	if "F1 Score" not in st.session_state.models_with_eval[str(model)]:

	st.session_state.models_with_eval[str(model)].append("F1 Score")

	if st.session_state["split_sets"] == "Train, Validation, and Test":

	st.session_state.all_the_process += f"""
	# Evaluation - F1 Score
	from sklearn.metrics import f1_score
	print("F1 Score on Train Set: ", f1_score(y_train, y_pred_train))
	print("F1 Score on Validation Set: ", f1_score(y_val, y_pred_val))
	print("F1 Score on Test Set: ", f1_score(y_test, y_pred_test))
	\n """
	from sklearn.metrics import f1_score
	train_f1 = f1_score(y_train, y_pred_train)
	val_f1 = f1_score(y_val, y_pred_val)
	test_f1 = f1_score(y_test, y_pred_test)

	metrics_df[metric] = [train_f1, val_f1, test_f1]
	st.session_state['metrics_df'] = metrics_df

	else:
	st.session_state.all_the_process += f"""
	# Evaluation - F1 Score
	from sklearn.metrics import f1_score
	print("F1 Score on Train Set: ", f1_score(y_train, y_pred_train))
	print("F1 Score on Test Set: ", f1_score(y_test, y_pred_test))
	\n """
	from sklearn.metrics import f1_score
	train_f1 = f1_score(y_train, y_pred_train)
	test_f1 = f1_score(y_test, y_pred_test)

	metrics_df[metric] = [train_f1, test_f1]
	st.session_state['metrics_df'] = metrics_df


	elif metric == "AUC Score":

	if "AUC Score" not in st.session_state.models_with_eval[str(model)]:

	st.session_state.models_with_eval[str(model)].append("AUC Score")

	if st.session_state["split_sets"] == "Train, Validation, and Test":

	st.session_state.all_the_process += f"""
	# Evaluation - AUC Score
	from sklearn.metrics import roc_auc_score
	print("AUC Score on Train Set: ", roc_auc_score(y_train, y_pred_train))
	print("AUC Score on Validation Set: ", roc_auc_score(y_val, y_pred_val))
	print("AUC Score on Test Set: ", roc_auc_score(y_test, y_pred_test))
	\n """
	from sklearn.metrics import roc_auc_score
	train_auc = roc_auc_score(y_train, y_pred_train)
	val_auc = roc_auc_score(y_val, y_pred_val)
	test_auc = roc_auc_score(y_test, y_pred_test)

	metrics_df[metric] = [train_auc, val_auc, test_auc]
	st.session_state['metrics_df'] = metrics_df

	else:
	st.session_state.all_the_process += f"""
	# Evaluation - AUC Score
	from sklearn.metrics import roc_auc_score
	print("AUC Score on Train Set: ", roc_auc_score(y_train, y_pred_train))
	print("AUC Score on Test Set: ", roc_auc_score(y_test, y_pred_test))
	\n """
	from sklearn.metrics import roc_auc_score
	train_auc = roc_auc_score(y_train, y_pred_train)
	test_auc = roc_auc_score(y_test, y_pred_test)

	metrics_df[metric] = [train_auc, test_auc]
	st.session_state['metrics_df'] = metrics_df


	elif metric == "Mean Absolute Error (MAE)":

	if "Mean Absolute Error (MAE)" not in st.session_state.models_with_eval[str(model)]:

	st.session_state.models_with_eval[str(model)].append("Mean Absolute Error (MAE)")

	if st.session_state["split_sets"] == "Train, Validation, and Test":

	st.session_state.all_the_process += f"""
	# Evaluation - MAE
	from sklearn.metrics import mean_absolute_error
	print("MAE on Train Set: ", mean_absolute_error(y_train, y_pred_train))
	print("MAE on Validation Set: ", mean_absolute_error(y_val, y_pred_val))
	print("MAE on Test Set: ", mean_absolute_error(y_test, y_pred_test))
	\n """
	from sklearn.metrics import mean_absolute_error
	train_mae = mean_absolute_error(y_train, y_pred_train)
	val_mae = mean_absolute_error(y_val, y_pred_val)
	test_mae = mean_absolute_error(y_test, y_pred_test)

	metrics_df[metric] = [train_mae, val_mae, test_mae]
	st.session_state['metrics_df'] = metrics_df

	else:
	st.session_state.all_the_process += f"""
	# Evaluation - MAE
	from sklearn.metrics import mean_absolute_error
	print("MAE on Train Set: ", mean_absolute_error(y_train, y_pred_train))
	print("MAE on Test Set: ", mean_absolute_error(y_test, y_pred_test))
	\n """
	from sklearn.metrics import mean_absolute_error
	train_mae = mean_absolute_error(y_train, y_pred_train)
	test_mae = mean_absolute_error(y_test, y_pred_test)

	metrics_df[metric] = [train_mae, test_mae]
	st.session_state['metrics_df'] = metrics_df


	elif metric == "Mean Squared Error (MSE)":

	if "Mean Squared Error (MSE)" not in st.session_state.models_with_eval[str(model)]:

	st.session_state.models_with_eval[str(model)].append("Mean Squared Error (MSE)")


	if st.session_state["split_sets"] == "Train, Validation, and Test":

	st.session_state.all_the_process += f"""
	# Evaluation - MSE
	from sklearn.metrics import mean_squared_error
	print("MSE on Train Set: ", mean_squared_error(y_train, y_pred_train))
	print("MSE on Validation Set: ", mean_squared_error(y_val, y_pred_val))
	print("MSE on Test Set: ", mean_squared_error(y_test, y_pred_test))
	\n """
	from sklearn.metrics import mean_squared_error
	train_mse = mean_squared_error(y_train, y_pred_train)
	val_mse = mean_squared_error(y_val, y_pred_val)
	test_mse = mean_squared_error(y_test, y_pred_test)

	metrics_df[metric] = [train_mse, val_mse, test_mse]
	st.session_state['metrics_df'] = metrics_df

	else:

	st.session_state.all_the_process += f"""
	# Evaluation - MSE
	from sklearn.metrics import mean_squared_error
	print("MSE on Train Set: ", mean_squared_error(y_train, y_pred_train))
	print("MSE on Test Set: ", mean_squared_error(y_test, y_pred_test))
	\n """
	from sklearn.metrics import mean_squared_error
	train_mse = mean_squared_error(y_train, y_pred_train)
	test_mse = mean_squared_error(y_test, y_pred_test)

	metrics_df[metric] = [train_mse, test_mse]
	st.session_state['metrics_df'] = metrics_df


	elif metric == "Root Mean Squared Error (RMSE)":

	if "Root Mean Squared Error (RMSE)" not in st.session_state.models_with_eval[str(model)]:

	st.session_state.models_with_eval[str(model)].append("Root Mean Squared Error (RMSE)")

	if st.session_state["split_sets"] == "Train, Validation, and Test":

	st.session_state.all_the_process += f"""
	# Evaluation - RMSE
	from sklearn.metrics import mean_squared_error
	print("RMSE on Train Set: ", np.sqrt(mean_squared_error(y_train, y_pred_train)))
	print("RMSE on Validation Set: ", np.sqrt(mean_squared_error(y_val, y_pred_val)))
	print("RMSE on Test Set: ", np.sqrt(mean_squared_error(y_test, y_pred_test)))
	\n """
	from sklearn.metrics import mean_squared_error
	train_rmse = np.sqrt(mean_squared_error(y_train, y_pred_train))
	val_rmse = np.sqrt(mean_squared_error(y_val, y_pred_val))
	test_rmse = np.sqrt(mean_squared_error(y_test, y_pred_test))

	metrics_df[metric] = [train_rmse, val_rmse, test_rmse]
	st.session_state['metrics_df'] = metrics_df

	else:

	st.session_state.all_the_process += f"""
	# Evaluation - RMSE
	from sklearn.metrics import mean_squared_error
	print("RMSE on Train Set: ", np.sqrt(mean_squared_error(y_train, y_pred_train)))
	print("RMSE on Test Set: ", np.sqrt(mean_squared_error(y_test, y_pred_test)))
	\n """
	from sklearn.metrics import mean_squared_error
	train_rmse = np.sqrt(mean_squared_error(y_train, y_pred_train))
	test_rmse = np.sqrt(mean_squared_error(y_test, y_pred_test))

	metrics_df[metric] = [train_rmse, test_rmse]
	st.session_state['metrics_df'] = metrics_df


	elif metric == "R2 Score":

	if "R2 Score" not in st.session_state.models_with_eval[str(model)]:

	st.session_state.models_with_eval[str(model)].append("R2 Score")

	if st.session_state["split_sets"] == "Train, Validation, and Test":

	st.session_state.all_the_process += f"""
	# Evaluation - R2 Score
	from sklearn.metrics import r2_score
	print("R2 Score on Train Set: ", r2_score(y_train, y_pred_train))
	print("R2 Score on Validation Set: ", r2_score(y_val, y_pred_val))
	print("R2 Score on Test Set: ", r2_score(y_test, y_pred_test))
	\n """
	from sklearn.metrics import r2_score
	train_r2 = r2_score(y_train, y_pred_train)
	val_r2 = r2_score(y_val, y_pred_val)
	test_r2 = r2_score(y_test, y_pred_test)

	metrics_df[metric] = [train_r2, val_r2, test_r2]
	st.session_state['metrics_df'] = metrics_df

	else:

	st.session_state.all_the_process += f"""
	# Evaluation - R2 Score
	from sklearn.metrics import r2_score
	print("R2 Score on Train Set: ", r2_score(y_train, y_pred_train))
	print("R2 Score on Test Set: ", r2_score(y_test, y_pred_test))
	\n """
	from sklearn.metrics import r2_score
	train_r2 = r2_score(y_train, y_pred_train)
	test_r2 = r2_score(y_test, y_pred_test)

	metrics_df[metric] = [train_r2, test_r2]
	st.session_state['metrics_df'] = metrics_df



	# Show Evaluation Metric
	if show_eval:
	new_line()
	col1, col2, col3 = st.columns([0.5, 1, 0.5])
	st.markdown("### Evaluation Metric")

	if st.session_state["split_sets"] == "Train, Validation, and Test":
	st.session_state['metrics_df'].index = ['Train', 'Validation', 'Test']
	st.write(st.session_state['metrics_df'])

	elif st.session_state["split_sets"] == "Train and Test":
	st.session_state['metrics_df'].index = ['Train', 'Test']
	st.write(st.session_state['metrics_df'])




	# Show Evaluation Metric Plot
	new_line()
	st.markdown("### Evaluation Metric Plot")
	st.line_chart(st.session_state['metrics_df'])

	# Show ROC Curve as plot
	if "AUC Score" in evaluation_metric:
	from sklearn.metrics import plot_roc_curve
	st.markdown("### ROC Curve")
	new_line()

	if st.session_state["split_sets"] == "Train, Validation, and Test":

	# Show the ROC curve plot without any columns
	col1, col2, col3 = st.columns([0.2, 1, 0.2])
	fig, ax = plt.subplots()
	plot_roc_curve(model, X_train, y_train, ax=ax)
	plot_roc_curve(model, X_val, y_val, ax=ax)
	plot_roc_curve(model, X_test, y_test, ax=ax)
	ax.legend(['Train', 'Validation', 'Test'])
	col2.pyplot(fig, legend=True)

	elif st.session_state["split_sets"] == "Train and Test":

	# Show the ROC curve plot without any columns
	col1, col2, col3 = st.columns([0.2, 1, 0.2])
	fig, ax = plt.subplots()
	plot_roc_curve(model, X_train, y_train, ax=ax)
	plot_roc_curve(model, X_test, y_test, ax=ax)
	ax.legend(['Train', 'Test'])
	col2.pyplot(fig, legend=True)



	# Show Confusion Matrix as plot
	if st.session_state['problem_type'] == "Classification":
	# from sklearn.metrics import plot_confusion_matrix
	from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
	st.markdown("### Confusion Matrix")
	new_line()

	cm = confusion_matrix(y_test, y_pred_test)
	col1, col2, col3 = st.columns([0.2,1,0.2])
	fig, ax = plt.subplots()
	ConfusionMatrixDisplay.from_predictions(y_test, y_pred_test, ax=ax)
	col2.pyplot(fig)

	# Show the confusion matrix plot without any columns
	# col1, col2, col3 = st.columns([0.2, 1, 0.2])
	# fig, ax = plt.subplots()
	# plot_confusion_matrix(model, X_test, y_test, ax=ax)
	# col2.pyplot(fig)


	st.divider()
	col1, col2, col3, col4= st.columns(4, gap='small')

	if col1.button("🎬 Show df", use_container_width=True):
	new_line()
	st.subheader(" 🎬 Show The Dataframe")
	st.write("The dataframe is the dataframe that is used on this application to build the Machine Learning model. You can see the dataframe below 👇")
	new_line()
	st.dataframe(df, use_container_width=True)

	st.session_state.df.to_csv("df.csv", index=False)
	df_file = open("df.csv", "rb")
	df_bytes = df_file.read()
	if col2.download_button("📌 Download df", df_bytes, "df.csv", key='save_df', use_container_width=True):
	st.success("Downloaded Successfully!")

	if col3.button("💻 Code", use_container_width=True):
	new_line()
	st.subheader("💻 The Code")
	st.write("The code below is the code that is used to build the model. It is the code that is generated by the app. You can copy the code and use it in your own project 😉")
	new_line()
	st.code(st.session_state.all_the_process, language='python')

	if col4.button("⛔ Reset", use_container_width=True):
	new_line()
	st.subheader("⛔ Reset")
	st.write("Click the button below to reset the app and start over again")
	new_line()
	st.session_state.reset_1 = True

	if st.session_state.reset_1:
	col1, col2, col3 = st.columns(3)
	if col2.button("⛔ Reset", use_container_width=True, key='reset'):
	st.session_state.df = None
	st.session_state.clear()
	st.experimental_rerun()