k-means clustering algorithm code example
Example 1: k-means clustering python
from sklearn.cluster import KMeans
kmeans = KMeans(init="random", n_clusters=3, n_init=10, max_iter=300, random_state=42 )
kmeans.fit(x_train)
print(kmeans.inertia_)
print(kmeans.cluster_centers_)
print(kmeans.n_iter_)
print(kmeans.labels_[:5])
Example 2: K-means clustering with a k-means++ like initialization mode
from pyspark.ml.linalg import Vectors
data = [(Vectors.dense([0.0, 0.0]),), (Vectors.dense([1.0, 1.0]),),
(Vectors.dense([9.0, 8.0]),), (Vectors.dense([8.0, 9.0]),)]
df = spark.createDataFrame(data, ["features"])
kmeans = KMeans(k=2, seed=1)
model = kmeans.fit(df)
centers = model.clusterCenters()
len(centers)
model.computeCost(df)
transformed = model.transform(df).select("features", "prediction")
rows = transformed.collect()
rows[0].prediction == rows[1].prediction
rows[2].prediction == rows[3].prediction
model.hasSummary
summary.k
summary.clusterSizes
kmeans_path = temp_path + "/kmeans"
kmeans.save(kmeans_path)
kmeans2 = KMeans.load(kmeans_path)
kmeans2.getK()
model_path = temp_path + "/kmeans_model"
model.save(model_path)
model2 = KMeansModel.load(model_path)
model2.hasSummary
model.clusterCenters()[0] == model2.clusterCenters()[0]
model.clustersCenters()[1] == model2.clusterCenters()[1]