Source code for clusterking.stability.stabilitytester

#!/usr/bin/env python3

# std
from abc import abstractmethod
from typing import Union
from pathlib import Path, PurePath

# 3rd
import pandas as pd

# ours
from clusterking.worker import AbstractWorker
from clusterking.result import AbstractResult
from clusterking.stability.fom import FOM


[docs]class StabilityTesterResult(AbstractResult): """Result of a :class:`AbstractStabilityTester`"""
[docs]class SimpleStabilityTesterResult(AbstractResult):
[docs] def __init__(self, df: pd.DataFrame): super().__init__() self.df = df
[docs] def write(self, path: Union[str, PurePath]) -> None: """Save to file.""" path = Path(path) path.parent.mkdir(parents=True, exist_ok=True) self.df.to_csv(path)
[docs] @classmethod def load(cls, path: Union[str, PurePath]) -> "SimpleStabilityTesterResult": """Load :class:`SimpleStabilityTesterResult` from file. Args: path: Path to result file Returns: :class:`SimpleStabilityTesterResult` object Example: sstr = SimpleStabilityTesterResult.load("path/to/file") """ return SimpleStabilityTesterResult(df=pd.read_csv(Path(path)))
[docs]class AbstractStabilityTester(AbstractWorker): """Abstract baseclass to perform stability tests. This baseclass is a subclass of :class:`clusterking.worker.AbstractWorker` and thereby adheres to the Command design pattern: After initialization, several methods can be called to modify internal settings. Finally, the :meth:`run` method is called to perform the actual test. All current stability tests perform the task at hand (clustering, benchmarking, etc.) for multiple, slightly varied datasets or worker parameters (these runs are called 'experiments'). For each of these (for each experiment), figures of merit (FOMs) are calculated that compare the outcome with the original outcome (e.g. how many points still lie in the same cluster, or how far the benchmark points are diverging). These FOMs are then written out to a :class:`StabilityTesterResult` object, which provides methods for visualization and further analyses (e.g. histograms, etc.). """
[docs] def __init__(self, exceptions="raise"): """Initialize :class:`AbstractStabilityTester` Args: exceptions: When calculating the FOM, what should we do if an exception arises. 'raise': Raise exception, 'print': Return None and print exception information. """ super().__init__() self._foms = {} self._exceptions_handling = exceptions
[docs] def add_fom(self, fom: FOM) -> None: """Add a figure of merit (FOM). Args: fom: :class:`~clusterking.stability.fom.FOM` object Returns: None """ if fom.name in self._foms: # todo: do with log print( "Warning: FOM with name {} already existed. Replacing.".format( fom.name ) ) self._foms[fom.name] = fom
[docs] @abstractmethod def run(self, *args, **kwargs) -> StabilityTesterResult: """Run the stability test. Args: *args: Positional arguments **kwargs: Key word arguments Returns: :class:`~StabilityTesterResult` object """ pass