File size: 3,949 Bytes
92b63f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import streamlit as st
import pandas as pd
from analysis import univariate_analysis, BivariateAnalysis, multivariate_analysis

def eda():
    st.image("/home/sarath_kumar/customer_churn_predict/assets/eda.webp",width=300)
    st.title("Exploratory Data Analysis")

    
    data = pd.read_csv("extracted/customer_churn_dataset-training-master.csv")
    data.drop("CustomerID",axis = 1,inplace = True)
  
    data.dropna(axis=0,inplace = True,how = "all")
    st.header("Dataset Overview")
    st.dataframe(data.head())

   
    st.subheader("Select Analysis Type")
    analysis_type = st.selectbox(
        "Select Analysis Type",
        ["Univariate Analysis", "Bivariate Analysis", "Multivariate Analysis"]
    )

    if analysis_type == "Univariate Analysis":
        st.subheader("Univariate Analysis")
        column = st.selectbox("Select a column for univariate analysis", data.columns, key="uni")
        plot_type = st.selectbox("Select plot type", ["Histogram", "Boxplot", "Pie Chart", "Bar Plot"], key="uni_plot")

      
        if st.button("Generate Univariate Plot", key="uni_button"):
            if column:
                univariate_analysis(data, column, plot_type)
            else:
                st.warning("Please select a column for analysis.")

    elif analysis_type == "Bivariate Analysis":
        st.subheader("Bivariate Analysis")
        column_x = st.selectbox("Select X-axis column", data.columns, key="bi_x")
        column_y = st.selectbox("Select Y-axis column", data.columns, key="bi_y")
        plot_type = st.selectbox("Select plot type", ["Scatter Plot", "Bar Plot", "Boxplot"], key="bi_plot")

        if st.button("Generate Bivariate Plot", key="bi_button"):
        
            if pd.api.types.is_numeric_dtype(data[column_x]) and pd.api.types.is_numeric_dtype(data[column_y]):
                analysis = BivariateAnalysis()
                analysis.numerical_vs_numerical(data, column_x, column_y, plot_type)
            elif pd.api.types.is_categorical_dtype(data[column_x]) and pd.api.types.is_categorical_dtype(data[column_y]): 
                analysis = BivariateAnalysis()
                analysis.numerical_vs_categorical(data, column_x, column_y, plot_type)
            elif pd.api.types.is_numeric_dtype(data[column_x]) and pd.api.types.is_categorical_dtype(data[column_y]):
                analysis = BivariateAnalysis()
                analysis.numerical_vs_categorical(data, column_x, column_y, plot_type)
            elif pd.api.types.is_categorical_dtype(data[column_x]) and pd.api.types.is_numeric_dtype(data[column_y]):
                analysis = BivariateAnalysis()
                analysis.numerical_vs_categorical(data, column_x, column_y, plot_type)
            elif pd.api.types.is_numeric_dtype(data[column_x]) and pd.api.types.is_numeric_dtype(data[column_y]):
                analysis = BivariateAnalysis()
                analysis.numerical_vs_numerical(data, column_x, column_y, plot_type)
            else:
                st.warning("Please select numerical columns for analysis. Only numerical data types are supported for this plot.")

    elif analysis_type == "Multivariate Analysis":
        data = pd.read_csv("/home/sarath_kumar/customer_chrun_prediction/processed_data/processed_data.csv")
        st.subheader("Multivariate Analysis")
        columns = st.multiselect("Select columns for multivariate analysis", data.columns)

        # Add an option for users to select the type of plot
        plot_type = st.selectbox(
            "Select plot type for multivariate analysis",
            ["Correlation Heatmap", "Scatter Matrix"]
        )

       
        if st.button("Generate Multivariate Plot", key="multi_button"):
            if columns:
                
                multivariate_analysis(data, columns, plot_type)
            else:
                st.warning("Please select columns for multivariate analysis.")

if __name__ == "__main__":
    eda()