Source code for clusterking.data.dwe

#!/usr/bin/env python3

# 3rd
import numpy as np

# ours
from clusterking.data.data import Data
from clusterking.maths.statistics import cov2err, cov2corr, abs2rel_cov, \
     ensure_array, corr2cov


[docs]class DataWithErrors(Data):
    """ This class extends the ``Data`` class by convenient and performant ways
    to add errors to the distributions.

    See the description of the ``Data`` class for more information about the
    data structure itself.

    There are three basic ways to add errors:
    1. Add relative errors (with correlation) relative to the bin content of each bin in the distribution: ``add_rel_err_...``
    2. Add absolute errors (with correlation): ``add_err_...``
    3. Add poisson errors: ``add_err_poisson``

    .. note::
        All of these methods, add the errors in a consistent way for all sample
        points/distributions, i.e. it is impossible to add a certain error
        specifically to one sample point only!

    Afterwards, you can get errors, correlation and covariance matrices for
    every data point by using one of the methods such as
    ``cov``, ``corr``, ``err``.

    .. note::
        When saving your dataset, your error configuration is saved as well,
        so you can reload it like any other ``Data`` object.

    Args:
        data: n x nbins matrix
    """
[docs]    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        # Initialize some values to their default
        self.rel_cov = None
        self.abs_cov = None
        self.poisson_errors = 0

    # **************************************************************************
    # A: Additional shortcuts
    # **************************************************************************

    @property
    def rel_cov(self):
        value = self.md["errors"]["rel_cov"]
        if value is None:
            if self.nbins:
                return np.zeros((self.nbins, self.nbins))
            else:
                return None
        return np.array(value)

    @rel_cov.setter
    def rel_cov(self, value):
        if isinstance(value, np.ndarray):
            # convert to list in order to serialize
            value = value.tolist()
        self.md["errors"]["rel_cov"] = value

    @property
    def abs_cov(self):
        value = self.md["errors"]["abs_cov"]
        if value is None:
            if self.nbins:
                return np.zeros((self.nbins, self.nbins))
            else:
                return None
        return np.array(value)

    @abs_cov.setter
    def abs_cov(self, value):
        if isinstance(value, np.ndarray):
            # convert to list in order to serialize
            value = value.tolist()
        self.md["errors"]["abs_cov"] = value

    @property
    def poisson_errors(self):
        return self.md["errors"]["poisson"]

    @poisson_errors.setter
    def poisson_errors(self, value):
        self.md["errors"]["poisson"] = value

    # **************************************************************************
    # B: Helper functions
    # **************************************************************************

    def _interpret_input(self, inpt, what: str) -> np.ndarray:
        """ Interpret user input

        Args:
            inpt: User input
            what: 'err', 'corr', 'cov'

        Returns:
            correctly shaped numpy array
        """
        inpt = ensure_array(inpt)
        if what.lower() in ["err"]:
            if inpt.ndim == 0:
                return np.tile(inpt, self.nbins)
            if inpt.ndim == 1:
                return inpt
            else:
                raise ValueError(
                    "Wrong dimension ({}) of {} array.".format(
                        inpt.ndim, what
                    )
                )
        elif what.lower() in ["corr", "cov"]:
            if inpt.ndim == 2:
                return inpt
            else:
                raise ValueError(
                    "Wrong dimension ({}) of {} array.".format(inpt.ndim, what)
                )
        else:
            raise ValueError("Unknown value what='{}'.".format(what))

    # **************************************************************************
    # C: Doing the actual calculations
    # **************************************************************************

    # Note: Overrides inherited method from data.
[docs]    def data(self, decorrelate=False, **kwargs):
        """ Return data matrix

        Args:
            decorrelate: Unrotate the correlation matrix to return uncorrelated
                data entries
            **kwargs: Any keyword argument to ``Data.data()``

        Returns:
            self.n x self.nbins array
        """
        ret = super().data(**kwargs)

        if decorrelate:
            inverses = np.linalg.inv(self.corr())
            ret = np.einsum("kij,kj->ki", inverses, ret)

        return ret

[docs]    def cov(self, relative=False):
        """ Return covariance matrix

        Args:
            relative: "Relative to data", i.e.
                :math:`\\mathrm{Cov}_{ij} / (\\mathrm{data}_i \cdot \\mathrm{data}_j)`

        Returns:
            self.n x self.nbins x self.nbins array
        """

        data = self.data()
        cov = np.tile(self.abs_cov, (self.n, 1, 1))
        cov += np.einsum("ij,ki,kj->kij", self.rel_cov, data, data)
        if self.poisson_errors:
            cov += corr2cov(
                np.tile(np.eye(self.nbins), (self.n, 1, 1)),
                np.sqrt(self.poisson_errors) * np.sqrt(data)
            )

        if not relative:
            return cov
        else:
            return abs2rel_cov(cov, data)

[docs]    def corr(self):
        """ Return correlation matrix

        Returns:
            self.n x self.nbins x self.nbins array
        """
        return cov2corr(self.cov())

[docs]    def err(self, relative=False):
        """ Return errors per bin

        Args:
            relative: Relative errors

        Returns:
            self.n x self.nbins array
        """
        if not relative:
            return cov2err(self.cov())
        else:
            return cov2err(self.cov()) / self.data()

    # **************************************************************************
    # D: Configuration
    # **************************************************************************

    # -------------------------------------------------------------------------
    # Add absolute errors
    # -------------------------------------------------------------------------

[docs]    def add_err_cov(self, cov) -> None:
        """ Add error from covariance matrix.

        Args:
            cov: self.n x self.nbins x self.nbins array of covariance matrices
                or self.nbins x self.nbins covariance matrix (if equal for
                all data points)
        """
        cov = self._interpret_input(cov, "cov")
        self.abs_cov += cov

[docs]    def add_err_corr(self, err, corr) -> None:
        """ Add error from errors vector and correlation matrix.

        Args:
            err: self.n x self.nbins vector of errors for each data point and
                bin or self.nbins vector of uniform errors per data point or
                float (uniform error per bin and datapoint)
            corr: self.n x self.nbins x self.nbins correlation matrices
                or self.nbins x self.nbins correlation matrix
        """
        err = self._interpret_input(err, "err")
        corr = self._interpret_input(corr, "corr")
        self.add_err_cov(corr2cov(corr, err))

[docs]    def add_err_uncorr(self, err) -> None:
        """
        Add uncorrelated error.

        Args:
            err: see argument of
                :py:meth:`~clusterking.data.dwe.DataWithErrors.add_err_corr`
        """
        err = self._interpret_input(err, "err")
        corr = np.identity(self.nbins)
        self.add_err_corr(err, corr)

[docs]    def add_err_maxcorr(self, err) -> None:
        """
        Add maximally correlated error.

        Args:
            err: see argument of
                :py:meth:`~clusterking.data.dwe.DataWithErrors.add_err_corr`
        """
        err = self._interpret_input(err, "err")
        corr = np.ones((self.nbins, self.nbins))
        self.add_err_corr(err, corr)

    # -------------------------------------------------------------------------
    # Add relative errors
    # -------------------------------------------------------------------------

[docs]    def add_rel_err_cov(self, cov: np.array) -> None:
        """
        Add error from "relative" covariance matrix

        Args:
            cov: see argument of
                :py:meth:`~clusterking.data.dwe.DataWithErrors.add_err_cov`
        """
        cov = self._interpret_input(cov, "cov")
        self.rel_cov += cov

[docs]    def add_rel_err_corr(self, err, corr) -> None:
        """
        Add error from relative errors and correlation matrix.

        Args:
            err: see argument of
                :py:meth:`~clusterking.data.dwe.DataWithErrors.add_err_corr`
            corr: see argument of
                :py:meth:`~clusterking.data.dwe.DataWithErrors.add_err_corr`
        """
        err = self._interpret_input(err, "err")
        corr = self._interpret_input(corr, "corr")
        self.add_rel_err_cov(corr2cov(corr, err))

[docs]    def add_rel_err_uncorr(self, err: np.array) -> None:
        """
        Add uncorrelated relative error.

        Args:
            err: see argument of
                :py:meth:`~clusterking.data.dwe.DataWithErrors.add_err_corr`
        """
        err = self._interpret_input(err, "err")
        corr = np.identity(self.nbins)
        self.add_rel_err_corr(err, corr)

[docs]    def add_rel_err_maxcorr(self, err: np.array) -> None:
        """
        Add maximally correlated relative error.

        Args:
            err: see argument of
                :py:meth:`~clusterking.data.dwe.DataWithErrors.add_err_corr`
        """
        err = self._interpret_input(err, "err")
        corr = np.ones((self.nbins, self.nbins))
        self.add_rel_err_corr(err, corr)

    # -------------------------------------------------------------------------
    # Other forms of errors
    # -------------------------------------------------------------------------

[docs]    def add_err_poisson(self, scale=1):
        """
        Add poisson errors/statistical errors.

        Args:
            scale: Scale poisson errors by this float (for example by your data
                normalization if your data itself is not normalized).

        Returns:
            None
        """
        self.poisson_errors = scale