41 lines
1.5 KiB
Plaintext
Executable File
41 lines
1.5 KiB
Plaintext
Executable File
|
|
import numpy as np
|
|
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
#Plot styling
|
|
import seaborn as sns; sns.set() # for plot styling
|
|
plt.rcParams['figure.figsize'] = (16, 9)
|
|
plt.style.use('ggplot')#Read the csv file
|
|
dataset=pd.read_csv('CLV.csv')#Explore the dataset
|
|
dataset.head()#top 5 columns
|
|
len(dataset) # of rows#descriptive statistics of the dataset
|
|
dataset.describe().transpose()
|
|
|
|
|
|
|
|
|
|
#Using the elbow method to find the optimum number of clusters
|
|
from sklearn.cluster import KMeans
|
|
wcss = []
|
|
for i in range(1,11):
|
|
km=KMeans(n_clusters=i,init='k-means++', max_iter=300, n_init=10, random_state=0)
|
|
km.fit(X)
|
|
wcss.append(km.inertia_)
|
|
plt.plot(range(1,11),wcss)
|
|
plt.title('Elbow Method')
|
|
plt.xlabel('Number of clusters')
|
|
plt.ylabel('wcss')
|
|
plt.show()
|
|
|
|
##Fitting kmeans to the dataset with k=4
|
|
km4=KMeans(n_clusters=4,init='k-means++', max_iter=300, n_init=10, random_state=0)
|
|
y_means = km4.fit_predict(X)#Visualizing the clusters for k=4
|
|
plt.scatter(X[y_means==0,0],X[y_means==0,1],s=50, c='purple',label='Cluster1')
|
|
plt.scatter(X[y_means==1,0],X[y_means==1,1],s=50, c='blue',label='Cluster2')
|
|
plt.scatter(X[y_means==2,0],X[y_means==2,1],s=50, c='green',label='Cluster3')
|
|
plt.scatter(X[y_means==3,0],X[y_means==3,1],s=50, c='cyan',label='Cluster4')plt.scatter(km4.cluster_centers_[:,0], km4.cluster_centers_[:,1],s=200,marker='s', c='red', alpha=0.7, label='Centroids')
|
|
plt.title('Customer segments')
|
|
plt.xlabel('Annual income of customer')
|
|
plt.ylabel('Annual spend from customer on site')
|
|
plt.legend()
|
|
plt.show() |