Source code for clusterking.cluster.cluster

#!/usr/bin/env python3

"""Read the results from scan.py and get_clusters them.
"""

# std
import time
from abc import abstractmethod

# 3rd
import pandas as pd

# us
from clusterking.util.metadata import version_info, nested_dict
from clusterking.util.log import get_logger
from clusterking.worker import DataWorker
from clusterking.result import DataResult


[docs]class Cluster(DataWorker): """Abstract baseclass of the Cluster classes. This class is subclassed to implement specific clustering algorithms and defines common functions. """
[docs] def __init__(self): """ Args: data: :py:class:`~clusterking.data.Data` object """ super().__init__() self.log = get_logger("Scanner") self.clusters = None # self.bpoints = None #: Metadata self.md = nested_dict() self.md["git"] = version_info(self.log) self.md["time"] = time.strftime("%a %d %b %Y %H:%M", time.gmtime())
[docs] @abstractmethod def run(self, data, **kwargs): """Implementation of the clustering. Should return an array-like object with the cluster number. """ pass
# todo: add back n_clusters
[docs]class ClusterResult(DataResult):
[docs] def __init__(self, data, md, clusters): super().__init__(data=data) self._md = md self._clusters = clusters self._md["n_clusters"] = len(set(self._clusters))
[docs] def get_clusters(self, indexed=False): if not indexed: return self._clusters else: return pd.Series(self._clusters, index=self._data.df.index)
[docs] def write(self, cluster_column="cluster"): """Write results back in the :py:class:`~clusterking.data.Data` object.""" self._data.df[cluster_column] = self._clusters self._data.md["cluster"][cluster_column] = self._md self._data.rename_clusters(column=cluster_column)