diff --git a/kmeans/algorithms.py b/kmeans/algorithms.py index dfe89a2..0702ea7 100644 --- a/kmeans/algorithms.py +++ b/kmeans/algorithms.py @@ -26,9 +26,13 @@ def mean_movement(clusters: List[Cluster]) -> float: return highest_movement -def unweighted_k_means(points: List[Point], k: int, d: float = 0.001) -> List[Cluster]: +def k_means(points: List[Point], k: int, d: float = 0.001) -> List[Cluster]: """ - Runs Lloyd's Algorithm for k-means clustering without weights. + Runs Lloyd's Algorithm for k-means clustering. + + If no weights are added (that is, all point weights are 1) the mean is + found using the arithmetic mean. If there are weights, the mean will be + a weighted mean of the points. @param points The list of points to cluster. @param k The number of clusters. @@ -75,13 +79,19 @@ def unweighted_k_means(points: List[Point], k: int, d: float = 0.001) -> List[Cl for cluster in clusters: # Update the mean with the new points if cluster.points is not None: - xs = [p.x for p in cluster.points] - ys = [p.y for p in cluster.points] + # When all weights are 1 the sum of these lists will be + # the exact same thing as the standard arithmetic mean. + xs = [p.x * p.weight for p in cluster.points] + ys = [p.y * p.weight for p in cluster.points] + + # When all weights are 1, the sum of this list is + # exactly the length of the list. + weights = [p.weight for p in cluster.points] # Averaging the xs and ys will give us the mean point # of our cluster. - new_x = sum(xs) / len(xs) - new_y = sum(ys) / len(ys) + new_x = sum(xs) / sum(weights) + new_y = sum(ys) / sum(weights) new_mean = Point(new_x, new_y)