From d0789e4f425f5c6df439929a33e76dee291e2dbc Mon Sep 17 00:00:00 2001 From: Taylor Bockman Date: Wed, 16 Oct 2019 19:47:55 -0700 Subject: [PATCH] Update --- kmeans/algorithms.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/kmeans/algorithms.py b/kmeans/algorithms.py index dfe89a2..0702ea7 100644 --- a/kmeans/algorithms.py +++ b/kmeans/algorithms.py @@ -26,9 +26,13 @@ def mean_movement(clusters: List[Cluster]) -> float: return highest_movement -def unweighted_k_means(points: List[Point], k: int, d: float = 0.001) -> List[Cluster]: +def k_means(points: List[Point], k: int, d: float = 0.001) -> List[Cluster]: """ - Runs Lloyd's Algorithm for k-means clustering without weights. + Runs Lloyd's Algorithm for k-means clustering. + + If no weights are added (that is, all point weights are 1) the mean is + found using the arithmetic mean. If there are weights, the mean will be + a weighted mean of the points. @param points The list of points to cluster. @param k The number of clusters. @@ -75,13 +79,19 @@ def unweighted_k_means(points: List[Point], k: int, d: float = 0.001) -> List[Cl for cluster in clusters: # Update the mean with the new points if cluster.points is not None: - xs = [p.x for p in cluster.points] - ys = [p.y for p in cluster.points] + # When all weights are 1 the sum of these lists will be + # the exact same thing as the standard arithmetic mean. + xs = [p.x * p.weight for p in cluster.points] + ys = [p.y * p.weight for p in cluster.points] + + # When all weights are 1, the sum of this list is + # exactly the length of the list. + weights = [p.weight for p in cluster.points] # Averaging the xs and ys will give us the mean point # of our cluster. - new_x = sum(xs) / len(xs) - new_y = sum(ys) / len(ys) + new_x = sum(xs) / sum(weights) + new_y = sum(ys) / sum(weights) new_mean = Point(new_x, new_y)