Sarathkumar1304ai's picture
all files
92b63f0 verified
raw
history blame
3.95 kB
import streamlit as st
import pandas as pd
from analysis import univariate_analysis, BivariateAnalysis, multivariate_analysis
def eda():
st.image("/home/sarath_kumar/customer_churn_predict/assets/eda.webp",width=300)
st.title("Exploratory Data Analysis")
data = pd.read_csv("extracted/customer_churn_dataset-training-master.csv")
data.drop("CustomerID",axis = 1,inplace = True)
data.dropna(axis=0,inplace = True,how = "all")
st.header("Dataset Overview")
st.dataframe(data.head())
st.subheader("Select Analysis Type")
analysis_type = st.selectbox(
"Select Analysis Type",
["Univariate Analysis", "Bivariate Analysis", "Multivariate Analysis"]
)
if analysis_type == "Univariate Analysis":
st.subheader("Univariate Analysis")
column = st.selectbox("Select a column for univariate analysis", data.columns, key="uni")
plot_type = st.selectbox("Select plot type", ["Histogram", "Boxplot", "Pie Chart", "Bar Plot"], key="uni_plot")
if st.button("Generate Univariate Plot", key="uni_button"):
if column:
univariate_analysis(data, column, plot_type)
else:
st.warning("Please select a column for analysis.")
elif analysis_type == "Bivariate Analysis":
st.subheader("Bivariate Analysis")
column_x = st.selectbox("Select X-axis column", data.columns, key="bi_x")
column_y = st.selectbox("Select Y-axis column", data.columns, key="bi_y")
plot_type = st.selectbox("Select plot type", ["Scatter Plot", "Bar Plot", "Boxplot"], key="bi_plot")
if st.button("Generate Bivariate Plot", key="bi_button"):
if pd.api.types.is_numeric_dtype(data[column_x]) and pd.api.types.is_numeric_dtype(data[column_y]):
analysis = BivariateAnalysis()
analysis.numerical_vs_numerical(data, column_x, column_y, plot_type)
elif pd.api.types.is_categorical_dtype(data[column_x]) and pd.api.types.is_categorical_dtype(data[column_y]):
analysis = BivariateAnalysis()
analysis.numerical_vs_categorical(data, column_x, column_y, plot_type)
elif pd.api.types.is_numeric_dtype(data[column_x]) and pd.api.types.is_categorical_dtype(data[column_y]):
analysis = BivariateAnalysis()
analysis.numerical_vs_categorical(data, column_x, column_y, plot_type)
elif pd.api.types.is_categorical_dtype(data[column_x]) and pd.api.types.is_numeric_dtype(data[column_y]):
analysis = BivariateAnalysis()
analysis.numerical_vs_categorical(data, column_x, column_y, plot_type)
elif pd.api.types.is_numeric_dtype(data[column_x]) and pd.api.types.is_numeric_dtype(data[column_y]):
analysis = BivariateAnalysis()
analysis.numerical_vs_numerical(data, column_x, column_y, plot_type)
else:
st.warning("Please select numerical columns for analysis. Only numerical data types are supported for this plot.")
elif analysis_type == "Multivariate Analysis":
data = pd.read_csv("/home/sarath_kumar/customer_chrun_prediction/processed_data/processed_data.csv")
st.subheader("Multivariate Analysis")
columns = st.multiselect("Select columns for multivariate analysis", data.columns)
# Add an option for users to select the type of plot
plot_type = st.selectbox(
"Select plot type for multivariate analysis",
["Correlation Heatmap", "Scatter Matrix"]
)
if st.button("Generate Multivariate Plot", key="multi_button"):
if columns:
multivariate_analysis(data, columns, plot_type)
else:
st.warning("Please select columns for multivariate analysis.")
if __name__ == "__main__":
eda()