import gradio as gr import pickle import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.preprocessing import StandardScaler from sklearn.cluster import KMeans with open('kmeans_model.pkl', 'rb') as file: kmeans = pickle.load(file) with open('scaler.pkl', 'rb') as file: scaler = pickle.load(file) dataset_path = 'Customers.csv' data = pd.read_csv(dataset_path) data['Family_Income_Product'] = data['Family Size'] * data['Annual Income ($)'] data['Family_Income_Ratio'] = data['Family Size'] / (data['Annual Income ($)'] + 1e-5) features = data[['Annual Income ($)', 'Family Size', 'Family_Income_Product', 'Family_Income_Ratio']] features_scaled = scaler.transform(features) clusters = kmeans.predict(features_scaled) data['Cluster'] = clusters def plot_cluster_distribution(): plt.figure(figsize=(10, 6)) sns.countplot(data=data, x='Cluster', palette='viridis') plt.title('Customer Distribution Across Clusters') plt.xlabel('Cluster') plt.ylabel('Number of Customers') plt.tight_layout() plt.savefig('/kaggle/working/cluster_distribution.png') return '/kaggle/working/cluster_distribution.png' def plot_spending_score_distribution(): plt.figure(figsize=(10, 6)) sns.boxplot(data=data, x='Cluster', y='Spending Score (1-100)', palette='viridis') plt.title('Spending Score Distribution Across Clusters') plt.xlabel('Cluster') plt.ylabel('Spending Score (1-100)') plt.tight_layout() plt.savefig('/kaggle/working/spending_score_distribution.png') return '/kaggle/working/spending_score_distribution.png' def cluster_summary(): summary = data.groupby('Cluster').agg({ 'Annual Income ($)': ['mean', 'std'], 'Family Size': ['mean', 'std'], 'Spending Score (1-100)': ['mean', 'std'] }).reset_index() return summary.to_dict() iface = gr.Interface( fn=cluster_summary, inputs=[], outputs=[ gr.Dataframe(label="Cluster Summary"), gr.Image(label="Customer Distribution Across Clusters"), gr.Image(label="Spending Score Distribution Across Clusters") ], live=True ) # Launch the interface iface.launch()