Spaces:
Sleeping
Sleeping
File size: 1,300 Bytes
3cab2dd 932b3cb 3cab2dd 932b3cb 3cab2dd fcc4124 3cab2dd fcc4124 3cab2dd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
from dataclasses import dataclass
import numpy as np
from cluster.distance import euclidean
from cluster.clusterer import Clusterer
@dataclass
class Kmeans(Clusterer):
k: int
max_iter: int
def build(
self,
X: np.array,
):
# randomly initialize centroids
centroids = X[np.random.choice(
X.shape[0],
self.k,
replace=False,
)]
# Calculate Euclidean distance between each data point and each centroid
# then assign each point to its closest cluster
clusters = self.assign_clusters(X, centroids)
centroids = self.update_centroids(self.k, X, clusters)
@staticmethod
def assign_clusters(
X: np.array,
centroids: np.array,
) -> np.array:
distances = np.sqrt(((X - centroids[:, np.newaxis])**2).sum(axis=2))
clusts = np.argmin(distances, axis=0)
return clusts
@staticmethod
def update_centroids(
k: int,
X: np.array,
clusters: np.array,
) -> np.array:
centroids = np.zeros((k, X.shape[1]))
for i in range(k):
centroids[i] = X[clusters == i].mean(axis=0)
return centroids
def label():
...
def main(self):
return self.from_dict()
|