Spaces:
Runtime error
Runtime error
File size: 3,949 Bytes
92b63f0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import streamlit as st
import pandas as pd
from analysis import univariate_analysis, BivariateAnalysis, multivariate_analysis
def eda():
st.image("/home/sarath_kumar/customer_churn_predict/assets/eda.webp",width=300)
st.title("Exploratory Data Analysis")
data = pd.read_csv("extracted/customer_churn_dataset-training-master.csv")
data.drop("CustomerID",axis = 1,inplace = True)
data.dropna(axis=0,inplace = True,how = "all")
st.header("Dataset Overview")
st.dataframe(data.head())
st.subheader("Select Analysis Type")
analysis_type = st.selectbox(
"Select Analysis Type",
["Univariate Analysis", "Bivariate Analysis", "Multivariate Analysis"]
)
if analysis_type == "Univariate Analysis":
st.subheader("Univariate Analysis")
column = st.selectbox("Select a column for univariate analysis", data.columns, key="uni")
plot_type = st.selectbox("Select plot type", ["Histogram", "Boxplot", "Pie Chart", "Bar Plot"], key="uni_plot")
if st.button("Generate Univariate Plot", key="uni_button"):
if column:
univariate_analysis(data, column, plot_type)
else:
st.warning("Please select a column for analysis.")
elif analysis_type == "Bivariate Analysis":
st.subheader("Bivariate Analysis")
column_x = st.selectbox("Select X-axis column", data.columns, key="bi_x")
column_y = st.selectbox("Select Y-axis column", data.columns, key="bi_y")
plot_type = st.selectbox("Select plot type", ["Scatter Plot", "Bar Plot", "Boxplot"], key="bi_plot")
if st.button("Generate Bivariate Plot", key="bi_button"):
if pd.api.types.is_numeric_dtype(data[column_x]) and pd.api.types.is_numeric_dtype(data[column_y]):
analysis = BivariateAnalysis()
analysis.numerical_vs_numerical(data, column_x, column_y, plot_type)
elif pd.api.types.is_categorical_dtype(data[column_x]) and pd.api.types.is_categorical_dtype(data[column_y]):
analysis = BivariateAnalysis()
analysis.numerical_vs_categorical(data, column_x, column_y, plot_type)
elif pd.api.types.is_numeric_dtype(data[column_x]) and pd.api.types.is_categorical_dtype(data[column_y]):
analysis = BivariateAnalysis()
analysis.numerical_vs_categorical(data, column_x, column_y, plot_type)
elif pd.api.types.is_categorical_dtype(data[column_x]) and pd.api.types.is_numeric_dtype(data[column_y]):
analysis = BivariateAnalysis()
analysis.numerical_vs_categorical(data, column_x, column_y, plot_type)
elif pd.api.types.is_numeric_dtype(data[column_x]) and pd.api.types.is_numeric_dtype(data[column_y]):
analysis = BivariateAnalysis()
analysis.numerical_vs_numerical(data, column_x, column_y, plot_type)
else:
st.warning("Please select numerical columns for analysis. Only numerical data types are supported for this plot.")
elif analysis_type == "Multivariate Analysis":
data = pd.read_csv("/home/sarath_kumar/customer_chrun_prediction/processed_data/processed_data.csv")
st.subheader("Multivariate Analysis")
columns = st.multiselect("Select columns for multivariate analysis", data.columns)
# Add an option for users to select the type of plot
plot_type = st.selectbox(
"Select plot type for multivariate analysis",
["Correlation Heatmap", "Scatter Matrix"]
)
if st.button("Generate Multivariate Plot", key="multi_button"):
if columns:
multivariate_analysis(data, columns, plot_type)
else:
st.warning("Please select columns for multivariate analysis.")
if __name__ == "__main__":
eda()
|