Spaces:
Runtime error
Runtime error
import streamlit as st | |
import pandas as pd | |
from analysis import univariate_analysis, BivariateAnalysis, multivariate_analysis | |
def eda(): | |
st.image("/home/sarath_kumar/customer_churn_predict/assets/eda.webp",width=300) | |
st.title("Exploratory Data Analysis") | |
data = pd.read_csv("extracted/customer_churn_dataset-training-master.csv") | |
data.drop("CustomerID",axis = 1,inplace = True) | |
data.dropna(axis=0,inplace = True,how = "all") | |
st.header("Dataset Overview") | |
st.dataframe(data.head()) | |
st.subheader("Select Analysis Type") | |
analysis_type = st.selectbox( | |
"Select Analysis Type", | |
["Univariate Analysis", "Bivariate Analysis", "Multivariate Analysis"] | |
) | |
if analysis_type == "Univariate Analysis": | |
st.subheader("Univariate Analysis") | |
column = st.selectbox("Select a column for univariate analysis", data.columns, key="uni") | |
plot_type = st.selectbox("Select plot type", ["Histogram", "Boxplot", "Pie Chart", "Bar Plot"], key="uni_plot") | |
if st.button("Generate Univariate Plot", key="uni_button"): | |
if column: | |
univariate_analysis(data, column, plot_type) | |
else: | |
st.warning("Please select a column for analysis.") | |
elif analysis_type == "Bivariate Analysis": | |
st.subheader("Bivariate Analysis") | |
column_x = st.selectbox("Select X-axis column", data.columns, key="bi_x") | |
column_y = st.selectbox("Select Y-axis column", data.columns, key="bi_y") | |
plot_type = st.selectbox("Select plot type", ["Scatter Plot", "Bar Plot", "Boxplot"], key="bi_plot") | |
if st.button("Generate Bivariate Plot", key="bi_button"): | |
if pd.api.types.is_numeric_dtype(data[column_x]) and pd.api.types.is_numeric_dtype(data[column_y]): | |
analysis = BivariateAnalysis() | |
analysis.numerical_vs_numerical(data, column_x, column_y, plot_type) | |
elif pd.api.types.is_categorical_dtype(data[column_x]) and pd.api.types.is_categorical_dtype(data[column_y]): | |
analysis = BivariateAnalysis() | |
analysis.numerical_vs_categorical(data, column_x, column_y, plot_type) | |
elif pd.api.types.is_numeric_dtype(data[column_x]) and pd.api.types.is_categorical_dtype(data[column_y]): | |
analysis = BivariateAnalysis() | |
analysis.numerical_vs_categorical(data, column_x, column_y, plot_type) | |
elif pd.api.types.is_categorical_dtype(data[column_x]) and pd.api.types.is_numeric_dtype(data[column_y]): | |
analysis = BivariateAnalysis() | |
analysis.numerical_vs_categorical(data, column_x, column_y, plot_type) | |
elif pd.api.types.is_numeric_dtype(data[column_x]) and pd.api.types.is_numeric_dtype(data[column_y]): | |
analysis = BivariateAnalysis() | |
analysis.numerical_vs_numerical(data, column_x, column_y, plot_type) | |
else: | |
st.warning("Please select numerical columns for analysis. Only numerical data types are supported for this plot.") | |
elif analysis_type == "Multivariate Analysis": | |
data = pd.read_csv("/home/sarath_kumar/customer_chrun_prediction/processed_data/processed_data.csv") | |
st.subheader("Multivariate Analysis") | |
columns = st.multiselect("Select columns for multivariate analysis", data.columns) | |
# Add an option for users to select the type of plot | |
plot_type = st.selectbox( | |
"Select plot type for multivariate analysis", | |
["Correlation Heatmap", "Scatter Matrix"] | |
) | |
if st.button("Generate Multivariate Plot", key="multi_button"): | |
if columns: | |
multivariate_analysis(data, columns, plot_type) | |
else: | |
st.warning("Please select columns for multivariate analysis.") | |
if __name__ == "__main__": | |
eda() | |