Source code for clusterking.cluster.kmeans_cluster

#!/usr/bin/env python3

# 3rd
import sklearn.cluster

# ours
from clusterking.cluster.cluster import Cluster, ClusterResult
from clusterking.util.metadata import failsafe_serialize, nested_dict

[docs]class KmeansClusterResult(ClusterResult): pass
[docs]class KmeansCluster(Cluster): """Kmeans clustering (`wikipedia <>`_) as implemented in :mod:`sklearn.cluster`. Example: .. code-block:: python import clusterking as ck d = ck.Data("/path/to/data.sql") # Load some data c = ck.cluster.KmeansCluster() # Init worker class c.set_kmeans_options(n_clusters=5) # Set options for clustering r = # Perform clustering on data r.write() # Write results back to data """
[docs] def __init__(self): super().__init__() self._kmeans_kwargs = {} = nested_dict()
[docs] def set_kmeans_options(self, **kwargs) -> None: """Configure clustering algorithms. Args: **kwargs: Keyword arguments to :func:`sklearn.cluster.KMeans`. """ self._kmeans_kwargs = kwargs["kmeans"]["kwargs"] = failsafe_serialize(kwargs)
[docs] def run(self, data) -> KmeansClusterResult: kmeans = sklearn.cluster.KMeans(**self._kmeans_kwargs) matrix = return KmeansClusterResult( data=data,, clusters=kmeans.predict(matrix) )