Source code for clusterking.data.dwe

#!/usr/bin/env python3

# 3rd
import numpy as np

# ours
from clusterking.data.data import Data
from clusterking.maths.statistics import cov2err, cov2corr, abs2rel_cov, \
     ensure_array, corr2cov


[docs]class DataWithErrors(Data): """ This class extends the ``Data`` class by convenient and performant ways to add errors to the distributions. See the description of the ``Data`` class for more information about the data structure itself. There are three basic ways to add errors: 1. Add relative errors (with correlation) relative to the bin content of each bin in the distribution: ``add_rel_err_...`` 2. Add absolute errors (with correlation): ``add_err_...`` 3. Add poisson errors: ``add_err_poisson`` .. note:: All of these methods, add the errors in a consistent way for all sample points/distributions, i.e. it is impossible to add a certain error specifically to one sample point only! Afterwards, you can get errors, correlation and covariance matrices for every data point by using one of the methods such as ``cov``, ``corr``, ``err``. .. note:: When saving your dataset, your error configuration is saved as well, so you can reload it like any other ``Data`` object. Args: data: n x nbins matrix """
[docs] def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # Initialize some values to their default self.rel_cov = None self.abs_cov = None self.poisson_errors = 0
# ************************************************************************** # A: Additional shortcuts # ************************************************************************** @property def rel_cov(self): value = self.md["errors"]["rel_cov"] if value is None: if self.nbins: return np.zeros((self.nbins, self.nbins)) else: return None return np.array(value) @rel_cov.setter def rel_cov(self, value): if isinstance(value, np.ndarray): # convert to list in order to serialize value = value.tolist() self.md["errors"]["rel_cov"] = value @property def abs_cov(self): value = self.md["errors"]["abs_cov"] if value is None: if self.nbins: return np.zeros((self.nbins, self.nbins)) else: return None return np.array(value) @abs_cov.setter def abs_cov(self, value): if isinstance(value, np.ndarray): # convert to list in order to serialize value = value.tolist() self.md["errors"]["abs_cov"] = value @property def poisson_errors(self): return self.md["errors"]["poisson"] @poisson_errors.setter def poisson_errors(self, value): self.md["errors"]["poisson"] = value # ************************************************************************** # B: Helper functions # ************************************************************************** def _interpret_input(self, inpt, what: str) -> np.ndarray: """ Interpret user input Args: inpt: User input what: 'err', 'corr', 'cov' Returns: correctly shaped numpy array """ inpt = ensure_array(inpt) if what.lower() in ["err"]: if inpt.ndim == 0: return np.tile(inpt, self.nbins) if inpt.ndim == 1: return inpt else: raise ValueError( "Wrong dimension ({}) of {} array.".format( inpt.ndim, what ) ) elif what.lower() in ["corr", "cov"]: if inpt.ndim == 2: return inpt else: raise ValueError( "Wrong dimension ({}) of {} array.".format(inpt.ndim, what) ) else: raise ValueError("Unknown value what='{}'.".format(what)) # ************************************************************************** # C: Doing the actual calculations # ************************************************************************** # Note: Overrides inherited method from data.
[docs] def data(self, decorrelate=False, **kwargs): """ Return data matrix Args: decorrelate: Unrotate the correlation matrix to return uncorrelated data entries **kwargs: Any keyword argument to ``Data.data()`` Returns: self.n x self.nbins array """ ret = super().data(**kwargs) if decorrelate: inverses = np.linalg.inv(self.corr()) ret = np.einsum("kij,kj->ki", inverses, ret) return ret
[docs] def cov(self, relative=False): """ Return covariance matrix Args: relative: "Relative to data", i.e. :math:`\\mathrm{Cov}_{ij} / (\\mathrm{data}_i \cdot \\mathrm{data}_j)` Returns: self.n x self.nbins x self.nbins array """ data = self.data() cov = np.tile(self.abs_cov, (self.n, 1, 1)) cov += np.einsum("ij,ki,kj->kij", self.rel_cov, data, data) if self.poisson_errors: cov += corr2cov( np.tile(np.eye(self.nbins), (self.n, 1, 1)), np.sqrt(self.poisson_errors) * np.sqrt(data) ) if not relative: return cov else: return abs2rel_cov(cov, data)
[docs] def corr(self): """ Return correlation matrix Returns: self.n x self.nbins x self.nbins array """ return cov2corr(self.cov())
[docs] def err(self, relative=False): """ Return errors per bin Args: relative: Relative errors Returns: self.n x self.nbins array """ if not relative: return cov2err(self.cov()) else: return cov2err(self.cov()) / self.data()
# ************************************************************************** # D: Configuration # ************************************************************************** # ------------------------------------------------------------------------- # Add absolute errors # -------------------------------------------------------------------------
[docs] def add_err_cov(self, cov) -> None: """ Add error from covariance matrix. Args: cov: self.n x self.nbins x self.nbins array of covariance matrices or self.nbins x self.nbins covariance matrix (if equal for all data points) """ cov = self._interpret_input(cov, "cov") self.abs_cov += cov
[docs] def add_err_corr(self, err, corr) -> None: """ Add error from errors vector and correlation matrix. Args: err: self.n x self.nbins vector of errors for each data point and bin or self.nbins vector of uniform errors per data point or float (uniform error per bin and datapoint) corr: self.n x self.nbins x self.nbins correlation matrices or self.nbins x self.nbins correlation matrix """ err = self._interpret_input(err, "err") corr = self._interpret_input(corr, "corr") self.add_err_cov(corr2cov(corr, err))
[docs] def add_err_uncorr(self, err) -> None: """ Add uncorrelated error. Args: err: see argument of :py:meth:`~clusterking.data.dwe.DataWithErrors.add_err_corr` """ err = self._interpret_input(err, "err") corr = np.identity(self.nbins) self.add_err_corr(err, corr)
[docs] def add_err_maxcorr(self, err) -> None: """ Add maximally correlated error. Args: err: see argument of :py:meth:`~clusterking.data.dwe.DataWithErrors.add_err_corr` """ err = self._interpret_input(err, "err") corr = np.ones((self.nbins, self.nbins)) self.add_err_corr(err, corr)
# ------------------------------------------------------------------------- # Add relative errors # -------------------------------------------------------------------------
[docs] def add_rel_err_cov(self, cov: np.array) -> None: """ Add error from "relative" covariance matrix Args: cov: see argument of :py:meth:`~clusterking.data.dwe.DataWithErrors.add_err_cov` """ cov = self._interpret_input(cov, "cov") self.rel_cov += cov
[docs] def add_rel_err_corr(self, err, corr) -> None: """ Add error from relative errors and correlation matrix. Args: err: see argument of :py:meth:`~clusterking.data.dwe.DataWithErrors.add_err_corr` corr: see argument of :py:meth:`~clusterking.data.dwe.DataWithErrors.add_err_corr` """ err = self._interpret_input(err, "err") corr = self._interpret_input(corr, "corr") self.add_rel_err_cov(corr2cov(corr, err))
[docs] def add_rel_err_uncorr(self, err: np.array) -> None: """ Add uncorrelated relative error. Args: err: see argument of :py:meth:`~clusterking.data.dwe.DataWithErrors.add_err_corr` """ err = self._interpret_input(err, "err") corr = np.identity(self.nbins) self.add_rel_err_corr(err, corr)
[docs] def add_rel_err_maxcorr(self, err: np.array) -> None: """ Add maximally correlated relative error. Args: err: see argument of :py:meth:`~clusterking.data.dwe.DataWithErrors.add_err_corr` """ err = self._interpret_input(err, "err") corr = np.ones((self.nbins, self.nbins)) self.add_rel_err_corr(err, corr)
# ------------------------------------------------------------------------- # Other forms of errors # -------------------------------------------------------------------------
[docs] def add_err_poisson(self, scale=1): """ Add poisson errors/statistical errors. Args: scale: Scale poisson errors by this float (for example by your data normalization if your data itself is not normalized). Returns: None """ self.poisson_errors = scale