# Program 5 # Cluster analysis using k-means algorithm for a given customer data set. # Dataset Link: https://www.kaggle.com/datasets/vjchoudhary7/customer-segmentation-tutorial-in-python import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from sklearn.cluster import KMeans data = pd.read_csv(r"") data.head() X = data[["Annual_Income_(k$)", "Spending_Score"]] wcss = [] # Within-Cluster Sum of Squares for i in range(1, 11): kmeans = KMeans(n_clusters=i, init="k-means++", random_state=42) kmeans.fit(X) wcss.append(kmeans.inertia_) plt.figure(figsize=(8, 4)) plt.plot(range(1, 11), wcss, marker="o") plt.title("The Elbow Method") plt.xlabel("Number of Clusters") plt.ylabel("WCSS") plt.grid(True) plt.show() kmeans = KMeans(n_clusters=5, init="k-means++", random_state=42) y_kmeans = kmeans.fit_predict(X) plt.figure(figsize=(10, 6)) plt.scatter( X.values[y_kmeans == 0, 0], X.values[y_kmeans == 0, 1], s=100, c="red", label="Cluster 1", ) plt.scatter( X.values[y_kmeans == 1, 0], X.values[y_kmeans == 1, 1], s=100, c="blue", label="Cluster 2", ) plt.scatter( X.values[y_kmeans == 2, 0], X.values[y_kmeans == 2, 1], s=100, c="green", label="Cluster 3", ) plt.scatter( X.values[y_kmeans == 3, 0], X.values[y_kmeans == 3, 1], s=100, c="cyan", label="Cluster 4", ) plt.scatter( X.values[y_kmeans == 4, 0], X.values[y_kmeans == 4, 1], s=100, c="magenta", label="Cluster 5", ) # Cluster Centers plt.scatter( kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=300, c="yellow", label="Centroids", ) plt.title("Customer Segments") plt.xlabel("Annual Income (k$)") plt.ylabel("Spending Score (1-100)") plt.legend() plt.grid(True) plt.show()