commit
					7ff69a83af
				
				 9 changed files with 378 additions and 0 deletions
			
			
		@ -0,0 +1,125 @@
					 | 
				
			||||
# Byte-compiled / optimized / DLL files | 
				
			||||
__pycache__/ | 
				
			||||
*.py[cod] | 
				
			||||
*$py.class | 
				
			||||
 | 
				
			||||
# C extensions | 
				
			||||
*.so | 
				
			||||
 | 
				
			||||
# Distribution / packaging | 
				
			||||
.Python | 
				
			||||
build/ | 
				
			||||
develop-eggs/ | 
				
			||||
dist/ | 
				
			||||
downloads/ | 
				
			||||
eggs/ | 
				
			||||
.eggs/ | 
				
			||||
lib/ | 
				
			||||
lib64/ | 
				
			||||
parts/ | 
				
			||||
sdist/ | 
				
			||||
var/ | 
				
			||||
wheels/ | 
				
			||||
pip-wheel-metadata/ | 
				
			||||
share/python-wheels/ | 
				
			||||
*.egg-info/ | 
				
			||||
.installed.cfg | 
				
			||||
*.egg | 
				
			||||
MANIFEST | 
				
			||||
 | 
				
			||||
# PyInstaller | 
				
			||||
#  Usually these files are written by a python script from a template | 
				
			||||
#  before PyInstaller builds the exe, so as to inject date/other infos into it. | 
				
			||||
*.manifest | 
				
			||||
*.spec | 
				
			||||
 | 
				
			||||
# Installer logs | 
				
			||||
pip-log.txt | 
				
			||||
pip-delete-this-directory.txt | 
				
			||||
 | 
				
			||||
# Unit test / coverage reports | 
				
			||||
htmlcov/ | 
				
			||||
.tox/ | 
				
			||||
.nox/ | 
				
			||||
.coverage | 
				
			||||
.coverage.* | 
				
			||||
.cache | 
				
			||||
nosetests.xml | 
				
			||||
coverage.xml | 
				
			||||
*.cover | 
				
			||||
*.py,cover | 
				
			||||
.hypothesis/ | 
				
			||||
.pytest_cache/ | 
				
			||||
 | 
				
			||||
# Translations | 
				
			||||
*.mo | 
				
			||||
*.pot | 
				
			||||
 | 
				
			||||
# Django stuff: | 
				
			||||
*.log | 
				
			||||
local_settings.py | 
				
			||||
db.sqlite3 | 
				
			||||
db.sqlite3-journal | 
				
			||||
 | 
				
			||||
# Flask stuff: | 
				
			||||
instance/ | 
				
			||||
.webassets-cache | 
				
			||||
 | 
				
			||||
# Scrapy stuff: | 
				
			||||
.scrapy | 
				
			||||
 | 
				
			||||
# Sphinx documentation | 
				
			||||
docs/_build/ | 
				
			||||
 | 
				
			||||
# PyBuilder | 
				
			||||
target/ | 
				
			||||
 | 
				
			||||
# Jupyter Notebook | 
				
			||||
.ipynb_checkpoints | 
				
			||||
 | 
				
			||||
# IPython | 
				
			||||
profile_default/ | 
				
			||||
ipython_config.py | 
				
			||||
 | 
				
			||||
# pyenv | 
				
			||||
.python-version | 
				
			||||
 | 
				
			||||
# pipenv | 
				
			||||
#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. | 
				
			||||
#   However, in case of collaboration, if having platform-specific dependencies or dependencies | 
				
			||||
#   having no cross-platform support, pipenv may install dependencies that don't work, or not | 
				
			||||
#   install all needed dependencies. | 
				
			||||
#Pipfile.lock | 
				
			||||
 | 
				
			||||
# celery beat schedule file | 
				
			||||
celerybeat-schedule | 
				
			||||
 | 
				
			||||
# SageMath parsed files | 
				
			||||
*.sage.py | 
				
			||||
 | 
				
			||||
# Environments | 
				
			||||
.env | 
				
			||||
.venv | 
				
			||||
env/ | 
				
			||||
venv/ | 
				
			||||
ENV/ | 
				
			||||
env.bak/ | 
				
			||||
venv.bak/ | 
				
			||||
 | 
				
			||||
# Spyder project settings | 
				
			||||
.spyderproject | 
				
			||||
.spyproject | 
				
			||||
 | 
				
			||||
# Rope project settings | 
				
			||||
.ropeproject | 
				
			||||
 | 
				
			||||
# mkdocs documentation | 
				
			||||
/site | 
				
			||||
 | 
				
			||||
# mypy | 
				
			||||
.mypy_cache/ | 
				
			||||
.dmypy.json | 
				
			||||
dmypy.json | 
				
			||||
 | 
				
			||||
# Pyre type checker | 
				
			||||
.pyre/ | 
				
			||||
@ -0,0 +1,3 @@
					 | 
				
			||||
# K-Means | 
				
			||||
 | 
				
			||||
Implementations of Unweighted and Weighted K-Means. | 
				
			||||
@ -0,0 +1,92 @@
					 | 
				
			||||
import random | 
				
			||||
import sys | 
				
			||||
from typing import List | 
				
			||||
 | 
				
			||||
from kmeans.clustering.cluster import Cluster | 
				
			||||
from kmeans.clustering.point import Point | 
				
			||||
from kmeans.clustering.geometry import dist | 
				
			||||
 | 
				
			||||
 | 
				
			||||
def mean_movement(clusters: List[Cluster]) -> float: | 
				
			||||
    """ | 
				
			||||
    Determines the maximum mean movement between two time periods. | 
				
			||||
 | 
				
			||||
    It will calculate the euclidean distance between each point and it's | 
				
			||||
    corresponding point in the other list (by finding the point using | 
				
			||||
    the point equality function built onto the Point class) and then return | 
				
			||||
    the distance moved. | 
				
			||||
    """ | 
				
			||||
 | 
				
			||||
    highest_movement = 0 | 
				
			||||
 | 
				
			||||
    for cluster in clusters: | 
				
			||||
        if cluster.mean_moved() > highest_movement: | 
				
			||||
            highest_movement = cluster.mean_moved() | 
				
			||||
 | 
				
			||||
    return highest_movement | 
				
			||||
 | 
				
			||||
 | 
				
			||||
def unweighted_k_means(points: List[Point], k: int, d: float = 0.001) -> List[Cluster]: | 
				
			||||
    """ | 
				
			||||
    Runs Lloyd's Algorithm for k-means clustering without weights. | 
				
			||||
 | 
				
			||||
    @param points The list of points to cluster. | 
				
			||||
    @param k The number of clusters. | 
				
			||||
    @param d The threshold distance where we will consider the means converged. | 
				
			||||
    """ | 
				
			||||
 | 
				
			||||
    # Our means are initialized to points in the set randomly but | 
				
			||||
    # are kept separately from the points themselves. | 
				
			||||
    means = random.sample(points, k) | 
				
			||||
 | 
				
			||||
    clusters = [] | 
				
			||||
 | 
				
			||||
    for i, mean in enumerate(means): | 
				
			||||
        clusters.append(Cluster(mean, [], None)) | 
				
			||||
 | 
				
			||||
    mean_needs_moving = True | 
				
			||||
 | 
				
			||||
    # Refer to the mean_movement function. It simply returns the largest | 
				
			||||
    # mean movement of all clusters. Once the movement of all clusters is | 
				
			||||
    # lower than `d` we can consider the series converged and return. | 
				
			||||
    while mean_needs_moving: | 
				
			||||
 | 
				
			||||
        # Compare each point to every cluster's centroid and calculate the | 
				
			||||
        # distance to it. Assign to the nearest cluster. | 
				
			||||
        for point in points: | 
				
			||||
            best_dist = sys.maxsize | 
				
			||||
            best_cluster = None | 
				
			||||
 | 
				
			||||
            for cluster in clusters: | 
				
			||||
                d = dist(point, cluster.mean) | 
				
			||||
                if d < best_dist: | 
				
			||||
                    best_dist = d | 
				
			||||
                    best_cluster = cluster | 
				
			||||
 | 
				
			||||
            if best_cluster is None: | 
				
			||||
                raise ValueError( | 
				
			||||
                    f'Failed to assign cluster to point {point}') | 
				
			||||
 | 
				
			||||
            if point.cluster is not None: | 
				
			||||
                point.cluster.remove_point(point) | 
				
			||||
 | 
				
			||||
            best_cluster.add_point(point) | 
				
			||||
 | 
				
			||||
        for cluster in clusters: | 
				
			||||
            # Update the mean with the new points | 
				
			||||
            if cluster.points is not None: | 
				
			||||
                xs = [p.x for p in cluster.points] | 
				
			||||
                ys = [p.y for p in cluster.points] | 
				
			||||
 | 
				
			||||
                # Averaging the xs and ys will give us the mean point | 
				
			||||
                # of our cluster. | 
				
			||||
                new_x = sum(xs) / len(xs) | 
				
			||||
                new_y = sum(ys) / len(ys) | 
				
			||||
 | 
				
			||||
                new_mean = Point(new_x, new_y) | 
				
			||||
 | 
				
			||||
                cluster.update_mean(new_mean) | 
				
			||||
 | 
				
			||||
        mean_needs_moving = mean_movement(clusters) > d | 
				
			||||
 | 
				
			||||
    return clusters | 
				
			||||
@ -0,0 +1,72 @@
					 | 
				
			||||
from typing import List | 
				
			||||
 | 
				
			||||
import numpy as np | 
				
			||||
 | 
				
			||||
from .geometry import dist | 
				
			||||
from .point import Point | 
				
			||||
 | 
				
			||||
 | 
				
			||||
class Cluster: | 
				
			||||
    """ | 
				
			||||
    Represents a cluster of points. | 
				
			||||
    """ | 
				
			||||
 | 
				
			||||
    def __init__(self, mean: Point, points: List[Point], color: List[float]): | 
				
			||||
        self._points = points | 
				
			||||
        self._children = None | 
				
			||||
        self._color = color | 
				
			||||
        self._mean = mean | 
				
			||||
        self._last_mean = None | 
				
			||||
 | 
				
			||||
    @property | 
				
			||||
    def points(self): | 
				
			||||
        return self._points | 
				
			||||
 | 
				
			||||
    @property | 
				
			||||
    def color(self): | 
				
			||||
        return self._color | 
				
			||||
 | 
				
			||||
    @property | 
				
			||||
    def mean(self): | 
				
			||||
        return self._mean | 
				
			||||
 | 
				
			||||
    @property | 
				
			||||
    def last_mean(self): | 
				
			||||
        return self._last_mean | 
				
			||||
 | 
				
			||||
    def update_mean(self, mean: Point): | 
				
			||||
        self._last_mean = self._mean | 
				
			||||
        self._mean = mean | 
				
			||||
 | 
				
			||||
    def mean_moved(self) -> float: | 
				
			||||
        return dist(self._mean, self._last_mean) | 
				
			||||
 | 
				
			||||
    @color.setter | 
				
			||||
    def color(self, color: List[float]): | 
				
			||||
        self._color = color | 
				
			||||
 | 
				
			||||
    def add_point(self, point: Point): | 
				
			||||
        self._points.append(point) | 
				
			||||
        point.cluster = self | 
				
			||||
 | 
				
			||||
    def remove_point(self, point: Point): | 
				
			||||
        self._points.remove(point) | 
				
			||||
 | 
				
			||||
    def __repr__(self): | 
				
			||||
        point_pairs = [(p.x, p.y) for p in self._points] | 
				
			||||
        string = f'POINTS: {point_pairs}\n' | 
				
			||||
 | 
				
			||||
        return string | 
				
			||||
 | 
				
			||||
    def __eq__(self, other): | 
				
			||||
        if not isinstance(other, Cluster): | 
				
			||||
            raise NotImplementedError('equality between clusters is only ' + | 
				
			||||
                                      'defined for clusters ' + | 
				
			||||
                                      f'(other={type(other)})') | 
				
			||||
 | 
				
			||||
        point_eq = True | 
				
			||||
 | 
				
			||||
        for point in self._points: | 
				
			||||
            point_eq = point_eq and (point in other.points) | 
				
			||||
 | 
				
			||||
        return point_eq | 
				
			||||
@ -0,0 +1,11 @@
					 | 
				
			||||
import numpy as np | 
				
			||||
 | 
				
			||||
from .point import Point | 
				
			||||
 | 
				
			||||
 | 
				
			||||
def dist(p: Point, q: Point) -> float: | 
				
			||||
    """ | 
				
			||||
    Calculates the Euclidian Distance of a point and another point. | 
				
			||||
    """ | 
				
			||||
    return np.sqrt(pow(q.x - p.x, 2) + | 
				
			||||
                   pow(q.y - p.y, 2)) | 
				
			||||
@ -0,0 +1,56 @@
					 | 
				
			||||
class Point: | 
				
			||||
    def __init__(self, x: int, y: int, weight: float = 1.0): | 
				
			||||
        self._x = x | 
				
			||||
        self._y = y | 
				
			||||
        self._cluster = None | 
				
			||||
        self._weight = weight | 
				
			||||
 | 
				
			||||
    @property | 
				
			||||
    def cluster(self): | 
				
			||||
        return self._cluster | 
				
			||||
 | 
				
			||||
    @cluster.setter | 
				
			||||
    def cluster(self, cluster): | 
				
			||||
        self._cluster = cluster | 
				
			||||
 | 
				
			||||
    @property | 
				
			||||
    def x(self): | 
				
			||||
        return self._x | 
				
			||||
 | 
				
			||||
    @property | 
				
			||||
    def y(self): | 
				
			||||
        return self._y | 
				
			||||
 | 
				
			||||
    @property | 
				
			||||
    def weight(self): | 
				
			||||
        return self._weight | 
				
			||||
 | 
				
			||||
    @x.setter | 
				
			||||
    def x(self, x): | 
				
			||||
        self._x = x | 
				
			||||
 | 
				
			||||
    @y.setter | 
				
			||||
    def y(self): | 
				
			||||
        self._y = y | 
				
			||||
 | 
				
			||||
    @weight.setter | 
				
			||||
    def weight(self, weight): | 
				
			||||
 | 
				
			||||
        if not isinstance(weight, float): | 
				
			||||
            raise ValueError('Weight must be a float') | 
				
			||||
 | 
				
			||||
        self._weight = weight | 
				
			||||
 | 
				
			||||
    def array(self): | 
				
			||||
        """ | 
				
			||||
        Returns array representation for use in convex hull. | 
				
			||||
        """ | 
				
			||||
        return [self._x, self._y] | 
				
			||||
 | 
				
			||||
    def __eq__(self, other): | 
				
			||||
        if not isinstance(other, Point): | 
				
			||||
            raise NotImplementedError('equality between points is only ' + | 
				
			||||
                                      'defined for points ' + | 
				
			||||
                                      f'(other={type(other)})') | 
				
			||||
 | 
				
			||||
        return self._x == other._x and self._y == other._y | 
				
			||||
@ -0,0 +1,19 @@
					 | 
				
			||||
from setuptools import setup, find_packages | 
				
			||||
 | 
				
			||||
with open('README.md', 'r') as fh: | 
				
			||||
    long_description = fh.read() | 
				
			||||
 | 
				
			||||
setup( | 
				
			||||
    name='kmeans', | 
				
			||||
    version='1.0.0', | 
				
			||||
    author='Taylor Bockman', | 
				
			||||
    author_email='tbockman@xchg.sh', | 
				
			||||
    description='Unweighted and Weighted K-Means implementation.', | 
				
			||||
    long_description=long_description, | 
				
			||||
    long_description_content_type='text/markdown', | 
				
			||||
    url='https://git.xchg.sh/angrygoats/kmeans', | 
				
			||||
    classifiers=[ | 
				
			||||
    ], | 
				
			||||
    python_requires='>=3.6', | 
				
			||||
    packages=find_packages(), | 
				
			||||
) | 
				
			||||
					Loading…
					
					
				
		Reference in new issue