Viraj2307 commited on
Commit
dc11300
Β·
1 Parent(s): 8c08a5a

Added app.py

Browse files
Files changed (1) hide show
  1. app.py +137 -0
app.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import datetime as dt
5
+ from sklearn.cluster import KMeans
6
+ import matplotlib.pyplot as plt
7
+ import seaborn as sns
8
+ import plotly.express as px
9
+
10
+ # Set the page configuration
11
+ st.set_page_config(page_title="Customer Segmentation", layout="wide")
12
+
13
+ # Title and Description
14
+ st.title("πŸ›’ Advanced Customer Segmentation App")
15
+ st.markdown("""
16
+ This application allows you to perform **Customer Segmentation** using RFM analysis and clustering.
17
+ Upload your dataset, analyze the metrics, and visualize customer behaviors interactively.
18
+ """)
19
+
20
+ # Sidebar for uploading data
21
+ st.sidebar.header("Upload Dataset")
22
+ uploaded_file = st.sidebar.file_uploader("Choose a CSV file", type=["csv"])
23
+
24
+ if uploaded_file:
25
+ # Load data
26
+ df = pd.read_csv(uploaded_file, encoding="ISO-8859-1", dtype={'CustomerID': str, 'InvoiceID': str})
27
+ st.sidebar.success("Dataset uploaded successfully!")
28
+ else:
29
+ st.sidebar.warning("Please upload a CSV file to start!")
30
+ st.stop()
31
+
32
+ # Data Cleaning and Preprocessing
33
+ st.header("🧹 Data Cleaning and Preprocessing")
34
+
35
+ # Create 'Amount' column
36
+ df["Amount"] = df["Quantity"] * df["UnitPrice"]
37
+ st.markdown("### Initial Data Preview")
38
+ st.write(df.head())
39
+
40
+ # Filter UK customers
41
+ df = df[df["Country"] == "United Kingdom"]
42
+ df = df[df["Quantity"] > 0]
43
+ df.dropna(subset=['CustomerID'], inplace=True)
44
+ df["InvoiceDate"] = pd.to_datetime(df["InvoiceDate"])
45
+ df["date"] = df["InvoiceDate"].dt.date
46
+
47
+ # Cleaned data preview
48
+ st.markdown("### Cleaned Data Overview")
49
+ st.write(df.describe())
50
+
51
+ # Summary Statistics
52
+ st.subheader("πŸ“Š Summary Statistics")
53
+ metrics = {
54
+ "Number of Invoices": df['InvoiceNo'].nunique(),
55
+ "Number of Products Bought": df['StockCode'].nunique(),
56
+ "Number of Customers": df['CustomerID'].nunique(),
57
+ "Average Quantity per Customer": round(df.groupby("CustomerID").Quantity.sum().mean(), 0),
58
+ "Average Revenue per Customer (Β£)": round(df.groupby("CustomerID").Amount.sum().mean(), 2),
59
+ }
60
+ st.write(pd.DataFrame(metrics.items(), columns=["Metric", "Value"]))
61
+
62
+ # Monthly Transactions Analysis
63
+ st.subheader("πŸ“… Monthly Transactions Analysis")
64
+ df['month'] = df['InvoiceDate'].dt.month
65
+ monthly_counts = df.groupby('month').size()
66
+
67
+ # Plot using Plotly
68
+ fig_monthly = px.bar(
69
+ monthly_counts,
70
+ x=monthly_counts.index,
71
+ y=monthly_counts.values,
72
+ labels={"x": "Month", "y": "Transactions"},
73
+ title="Transactions Per Month"
74
+ )
75
+ st.plotly_chart(fig_monthly)
76
+
77
+ # RFM Analysis
78
+ st.header("πŸ“ˆ RFM Analysis")
79
+
80
+ # Recency Calculation
81
+ now = pd.Timestamp("2011-12-09")
82
+ recency_df = df.groupby("CustomerID")["date"].max().reset_index()
83
+ recency_df["Recency"] = (now - pd.to_datetime(recency_df["date"])).dt.days
84
+
85
+ # Frequency Calculation
86
+ frequency_df = df.groupby("CustomerID")["InvoiceNo"].nunique().reset_index()
87
+ frequency_df.rename(columns={"InvoiceNo": "Frequency"}, inplace=True)
88
+
89
+ # Monetary Calculation
90
+ monetary_df = df.groupby("CustomerID")["Amount"].sum().reset_index()
91
+ monetary_df.rename(columns={"Amount": "Monetary"}, inplace=True)
92
+
93
+ # Combine RFM
94
+ rfm = recency_df.merge(frequency_df, on="CustomerID").merge(monetary_df, on="CustomerID")
95
+ st.write("### RFM Data")
96
+ st.write(rfm.head())
97
+
98
+ # Visualize RFM Distributions
99
+ fig_rfm = px.scatter_3d(
100
+ rfm,
101
+ x="Recency",
102
+ y="Frequency",
103
+ z="Monetary",
104
+ color="Monetary",
105
+ size="Monetary",
106
+ title="RFM Scatter Plot"
107
+ )
108
+ st.plotly_chart(fig_rfm)
109
+
110
+ # K-Means Clustering
111
+ st.header("πŸ“ K-Means Clustering")
112
+ st.sidebar.subheader("Clustering Parameters")
113
+ num_clusters = st.sidebar.slider("Number of Clusters", 2, 10, value=4)
114
+
115
+ kmeans = KMeans(n_clusters=num_clusters, random_state=42)
116
+ rfm["Cluster"] = kmeans.fit_predict(rfm[["Recency", "Frequency", "Monetary"]])
117
+
118
+ # Cluster Visualization
119
+ fig_cluster = px.scatter_3d(
120
+ rfm,
121
+ x="Recency",
122
+ y="Frequency",
123
+ z="Monetary",
124
+ color="Cluster",
125
+ title=f"Customer Segmentation with {num_clusters} Clusters",
126
+ symbol="Cluster",
127
+ size="Monetary",
128
+ )
129
+ st.plotly_chart(fig_cluster)
130
+
131
+ # Export Data
132
+ st.header("πŸ“€ Export Processed Data")
133
+ if st.button("Export RFM Data"):
134
+ rfm.to_csv("rfm_data.csv", index=False)
135
+ st.success("RFM data exported as `rfm_data.csv`!")
136
+
137
+ st.markdown("### Enjoy exploring your customer data! πŸš€")