commit 9889a51e1326244d5d7745e877265f9b142b4b56 Author: Taylor Bockman Date: Wed Oct 16 13:41:05 2019 -0700 init diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f717486 --- /dev/null +++ b/.gitignore @@ -0,0 +1,125 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..393edcd --- /dev/null +++ b/README.md @@ -0,0 +1,5 @@ +# K-Means Example + +An example project using the `kmeans` library. + +![](example.png) diff --git a/example.png b/example.png new file mode 100644 index 0000000..18118e4 Binary files /dev/null and b/example.png differ diff --git a/main.py b/main.py new file mode 100644 index 0000000..1cd0c6d --- /dev/null +++ b/main.py @@ -0,0 +1,114 @@ +import getopt +import random +import sys +from typing import List + +import numpy as np +import matplotlib.pyplot as plt +from scipy.spatial import ConvexHull +from scipy.spatial.qhull import QhullError + +from kmeans.algorithms import unweighted_k_means +from kmeans.clustering.cluster import Cluster +from kmeans.clustering.point import Point + + +def generate_points(x_bound: int, y_bound: int, count: int) -> List: + """ + Generates random points without replacement bounded by (x_bound, y_bound) + + @param x_bound The x direction boundary. + @param y_bound The y direction boundary. + @param count The count of points. + """ + xs = random.sample(range(0, x_bound), count) + + ys = random.sample(range(0, y_bound), count) + + points = list(zip(xs, ys)) + + result = [] + for p in points: + result.append(Point(p[0], p[1])) + + return result + + +def main(): + try: + opts, args = getopt.getopt(sys.argv[1:], "x:y:r:", + ['x=', 'y=', 'random=']) + except getopt.GetoptError as err: + print('Option not recognized') + sys.exit(-1) + + x = None + y = None + random = None + + for o, a in opts: + if o in ('-x', '--x'): + x = int(a) + elif o in ('-y', '--y'): + y = int(a) + elif o in ('-r', '--random'): + random = int(a) + else: + print(f'Unknown option {o}\n') + sys.exit(-1) + + if x is None or y is None or random is None: + print('x, y, and r must be specified\n') + sys.exit(-1) + + points = generate_points(x, y, random) + + print('--- INITIAL POINT PLOT ---') + xs = [p.x for p in points] + ys = [p.y for p in points] + plt.plot(xs, ys, 'o') + plt.show() + + clusters = unweighted_k_means(points, 4, 0.001) + + # Color clusters + assigned_colors = plt.cm.gist_ncar(np.linspace(0, 1, 4)) + + for i, cluster in enumerate(clusters): + cluster.color = assigned_colors[i] + + print('--- CLUSTER PLOT ---') + for cluster in clusters: + xs = [p.x for p in cluster.points] + ys = [p.y for p in cluster.points] + plt.plot(xs, ys, 'o', cluster.color) + + plt.show() + + print('--- CLUSTER PLOT WITH CONVEX HULL BOUNDARIES ---') + for cluster in clusters: + xs = [p.x for p in cluster.points] + ys = [p.y for p in cluster.points] + plt.plot(xs, ys, 'o', cluster.color) + + # Convex hull plot + if len(cluster.points) >= 4: + points = np.array([p.array() for p in cluster.points]) + + try: + hull = ConvexHull(points) + except QhullError as e: + print(str(e)) + continue + + plt.plot(points[hull.vertices, 0], + points[hull.vertices, 1], 'r--', lw=2) + + plt.plot(points[hull.vertices[0], 0], + points[hull.vertices[0], 1], 'ro') + + plt.show() + + +if __name__ == '__main__': + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b912431 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,28 @@ +asn1crypto==0.24.0 +autopep8==1.4.4 +behave==1.2.6 +certifi==2019.9.11 +cffi==1.12.3 +chardet==3.0.4 +cryptography==2.7 +cycler==0.10.0 +Cython==0.29.13 +decorator==4.4.0 +entrypoints==0.3 +flake8==3.7.8 +kiwisolver==1.1.0 +matplotlib==3.1.1 +mccabe==0.6.1 +numpy==1.17.2 +parse==1.12.1 +parse-type==0.5.2 +pycodestyle==2.5.0 +pycparser==2.19 +pyflakes==2.1.1 +pyparsing==2.4.2 +python-dateutil==2.8.0 +scipy==1.3.1 +six==1.12.0 + + +-e git+git://git.xchg.sh/angrygoats/kmeans.git@master#egg=kmeans diff --git a/run_sample.sh b/run_sample.sh new file mode 100644 index 0000000..5a46272 --- /dev/null +++ b/run_sample.sh @@ -0,0 +1 @@ +python main.py -x 100 -y 100 -r 50