import numpy as np import pandas as pd import matplotlib.pyplot as plt #Plot styling import seaborn as sns; sns.set() # for plot styling plt.rcParams['figure.figsize'] = (16, 9) plt.style.use('ggplot')#Read the csv file dataset=pd.read_csv('CLV.csv')#Explore the dataset dataset.head()#top 5 columns len(dataset) # of rows#descriptive statistics of the dataset dataset.describe().transpose() #Using the elbow method to find the optimum number of clusters from sklearn.cluster import KMeans wcss = [] for i in range(1,11): km=KMeans(n_clusters=i,init='k-means++', max_iter=300, n_init=10, random_state=0) km.fit(X) wcss.append(km.inertia_) plt.plot(range(1,11),wcss) plt.title('Elbow Method') plt.xlabel('Number of clusters') plt.ylabel('wcss') plt.show() ##Fitting kmeans to the dataset with k=4 km4=KMeans(n_clusters=4,init='k-means++', max_iter=300, n_init=10, random_state=0) y_means = km4.fit_predict(X)#Visualizing the clusters for k=4 plt.scatter(X[y_means==0,0],X[y_means==0,1],s=50, c='purple',label='Cluster1') plt.scatter(X[y_means==1,0],X[y_means==1,1],s=50, c='blue',label='Cluster2') plt.scatter(X[y_means==2,0],X[y_means==2,1],s=50, c='green',label='Cluster3') plt.scatter(X[y_means==3,0],X[y_means==3,1],s=50, c='cyan',label='Cluster4')plt.scatter(km4.cluster_centers_[:,0], km4.cluster_centers_[:,1],s=200,marker='s', c='red', alpha=0.7, label='Centroids') plt.title('Customer segments') plt.xlabel('Annual income of customer') plt.ylabel('Annual spend from customer on site') plt.legend() plt.show()