Source code for clusterking.cluster.cluster
#!/usr/bin/env python3
"""Read the results from scan.py and get_clusters them.
"""
# std
import time
from abc import abstractmethod
# 3rd
import pandas as pd
# us
from clusterking.util.metadata import version_info, nested_dict
from clusterking.util.log import get_logger
from clusterking.worker import DataWorker
from clusterking.result import DataResult
[docs]class Cluster(DataWorker):
"""Abstract baseclass of the Cluster classes. This class is subclassed to
implement specific clustering algorithms and defines common functions.
"""
[docs] def __init__(self):
"""
Args:
data: :py:class:`~clusterking.data.Data` object
"""
super().__init__()
self.log = get_logger("Scanner")
self.clusters = None
# self.bpoints = None
#: Metadata
self.md = nested_dict()
self.md["git"] = version_info(self.log)
self.md["time"] = time.strftime("%a %d %b %Y %H:%M", time.gmtime())
[docs] @abstractmethod
def run(self, data, **kwargs):
"""Implementation of the clustering. Should return an array-like object
with the cluster number.
"""
pass
# todo: add back n_clusters
[docs]class ClusterResult(DataResult):
[docs] def __init__(self, data, md, clusters):
super().__init__(data=data)
self._md = md
self._clusters = clusters
self._md["n_clusters"] = len(set(self._clusters))
[docs] def get_clusters(self, indexed=False):
if not indexed:
return self._clusters
else:
return pd.Series(self._clusters, index=self._data.df.index)
[docs] def write(self, cluster_column="cluster"):
"""Write results back in the :py:class:`~clusterking.data.Data`
object."""
self._data.df[cluster_column] = self._clusters
self._data.md["cluster"][cluster_column] = self._md
self._data.rename_clusters(column=cluster_column)