diff --git a/clusterview/algorithms.py b/clusterview/algorithms.py new file mode 100644 index 0000000..ed7f897 --- /dev/null +++ b/clusterview/algorithms.py @@ -0,0 +1,120 @@ +from .math import Math +from .points import Point, PointSet + + +class CentroidGrouping: + """ + A storage class used because Points are not hashable (since the x and y + can change). This allows us to do better than just dumping the grouping + into a dictionary with a long tuple pointing at an array. + """ + def __init__(self, centroid, points=[]): + if not isinstance(centroid, Point): + ValueError("Centroid must be a Point.") + + if not isinstance(points, list): + ValueError("Points must be in a list.") + + self.__centroid = centroid + self.__points = points + + @property + def centroid(self): + return self.__centroid + + @property + def points(self): + return self.__points + + def add_point(self, point): + """ + Adds a point. + + @param point The point. + """ + + if not isinstance(point, Point): + raise ValueError("Point must be of type Point.") + + self.__points.append(point) + + def __eq__(self, other): + return (self.centroid == other.centroid and + self.points == other.points) + + +class Algorithms: + """ + A static class for handling and containing various computational + geometry algorithms. + """ + + # Since all algorithms rely on a set of centroids it is stored here + # statically. + __centroids = [] + + @classmethod + def clear_centroids(cls): + cls.__centroids = [] + + @classmethod + def centroids(cls): + return cls.__centroids + + @classmethod + def set_centroids(cls, centroids): + for c in centroids: + if not isinstance(c, Point): + raise ValueError("Centroids must be of type Point.") + + cls.__centroids.append(c) + + @classmethod + def euclidean_grouping(cls, point_set): + """ + Given a point set that EXCLUDES the centroids specified + it returns a map from centroid to array of points, where the array + of points contains the points with the smallest euclidean distance + from that point. + + @param cls The class calling the method. + @param point_set The set of points from the UI. + """ + if not isinstance(point_set, PointSet): + raise ValueError("Euclidean grouping can only be calculated on " + + "PointSet types.") + + if not cls.__centroids: + raise ValueError("No centroids specified.") + + groups = [] + + for centroid in cls.__centroids: + groups.append(CentroidGrouping(centroid)) + + for point in point_set.points: + nearest_distance = float("inf") + nearest_centroid = None + + for centroid in cls.__centroids: + current_distance = Math.euclidean_distance(centroid, point) + + if current_distance < nearest_distance: + nearest_centroid = centroid + nearest_distance = current_distance + + if nearest_centroid is None: + raise ValueError("Failed to find centroid nearest " + + f"to point {point}") + + # We successfully found the nearest centroid to the point + # and we can add it to the list. + # TODO: Can CentroidGrouping be made hashable? + # This is relatively slow for large numbers of groups. If + # CentroidGrouping can be made hashable then this becomes O(1). + for group in groups: + if nearest_centroid == group.centroid: + group.add_point(point) + break + + return groups diff --git a/clusterview/colors.py b/clusterview/colors.py index 216e27c..d8adb9c 100644 --- a/clusterview/colors.py +++ b/clusterview/colors.py @@ -5,6 +5,9 @@ class Color(str, Enum): BLUE = 'BLUE' BLACK = 'BLACK' GREY = 'GREY' + RED = 'RED' + ORANGE = 'ORANGE' + PURPLE = 'PURPLE' # A simple map from Color -> RGBA 4-Tuple @@ -13,5 +16,8 @@ class Color(str, Enum): COLOR_TO_RGBA = { Color.GREY: (0.827, 0.827, 0.826, 0.0), Color.BLUE: (0.118, 0.565, 1.0, 0.0), - Color.BLACK: (0.0, 0.0, 0.0, 0.0) + Color.BLACK: (0.0, 0.0, 0.0, 0.0), + Color.RED: (1.0, 0.0, 0.0, 0.0), + Color.ORANGE: (0.98, 0.625, 0.12, 0.0), + Color.PURPLE: (0.60, 0.40, 0.70, 0.0) } diff --git a/tests/test_algorithms.py b/tests/test_algorithms.py new file mode 100644 index 0000000..7f90018 --- /dev/null +++ b/tests/test_algorithms.py @@ -0,0 +1,89 @@ +import pytest + +from clusterview.algorithms import Algorithms, CentroidGrouping +from clusterview.colors import Color +from clusterview.points import Point, PointSet + + +@pytest.fixture(autouse=True, scope="function") +def teardown(): + """ + Teardown function for after each test. The current pytest best practice + is to run a setup routine, yield, and then run your teardown routine. + """ + yield + Algorithms.clear_centroids() + + +def test_empty_centroids(): + with pytest.raises(ValueError): + Algorithms.euclidean_grouping(None) + + +def test_wrong_point_set(): + centroid_g1 = Point(101, 81, Color.ORANGE, 8, 800, 600) + centroid_g2 = Point(357, 222, Color.RED, 8, 800, 600) + centroid_g3 = Point(728, 47, Color.PURPLE, 8, 800, 600) + + centroids = [centroid_g1, centroid_g2, centroid_g3] + + Algorithms.set_centroids(centroids) + + with pytest.raises(ValueError): + Algorithms.euclidean_grouping(None) + + +def test_euclidean_distance(): + centroid_g1 = Point(101, 81, Color.ORANGE, 8, 800, 600) + centroid_g2 = Point(357, 222, Color.RED, 8, 800, 600) + centroid_g3 = Point(728, 47, Color.PURPLE, 8, 800, 600) + + centroids = [centroid_g1, centroid_g2, centroid_g3] + + point1_g1 = Point(67, 59, Color.GREY, 8, 800, 600) + point2_g1 = Point(116, 53, Color.GREY, 8, 800, 600) + point3_g1 = Point(144, 105, Color.GREY, 8, 800, 600) + + point1_g2 = Point(388, 243, Color.GREY, 8, 800, 600) + point2_g2 = Point(358, 248, Color.GREY, 8, 800, 600) + point3_g2 = Point(426, 202, Color.GREY, 8, 800, 600) + + point1_g3 = Point(750, 47, Color.GREY, 8, 800, 600) + point2_g3 = Point(741, 85, Color.GREY, 8, 800, 600) + point3_g3 = Point(700, 72, Color.GREY, 8, 800, 600) + + # This PointSet is the PointSet that excludes the centroids. + point_set = PointSet(8, 800, 600) + point_set.add_point(67, 59, Color.GREY) + point_set.add_point(116, 53, Color.GREY) + point_set.add_point(144, 105, Color.GREY) + + point_set.add_point(388, 243, Color.GREY) + point_set.add_point(358, 248, Color.GREY) + point_set.add_point(426, 202, Color.GREY) + + point_set.add_point(750, 47, Color.GREY) + point_set.add_point(741, 85, Color.GREY) + point_set.add_point(700, 72, Color.GREY) + + centroid_grouping_1 = CentroidGrouping(centroid_g1, + [point1_g1, point2_g1, point3_g1]) + + centroid_grouping_2 = CentroidGrouping(centroid_g2, + [point1_g2, point2_g2, point3_g2]) + + centroid_grouping_3 = CentroidGrouping(centroid_g3, + [point1_g3, point2_g3, point3_g3]) + + expected = [centroid_grouping_1, centroid_grouping_2, centroid_grouping_3] + + Algorithms.set_centroids(centroids) + actual = Algorithms.euclidean_grouping(point_set) + + assert len(actual) == len(expected) + + # Since I don't want to figure out what grouping is where I'll accept + # the linearity of `in`. + assert centroid_grouping_1 in actual + assert centroid_grouping_2 in actual + assert centroid_grouping_3 in actual