Source code for snat_sim.utils.cov_utils

"""The ``cov_utils`` module extends ``pandas.DataFrame`` objects to include
covariance calculations relevant to processing supernova fit results.

.. note:: This module is heavily based on code ported from the
   `AnalyzeSN <https://github.com/rbiswas4/AnalyzeSN>`_ project.


Usage Example
-------------

The ``cov_utils`` accessor provides a constructor method for converting
covariance data from ``numpy`` type objects into a ``pandas.DataFrame``
instance:

.. doctest::

   >>> import numpy as np
   >>> import pandas as pd
   >>> import snat_sim

   >>> parameter_names = ['z', 't0', 'x0', 'x1', 'c']
   >>> example_covariance = np.array([
   ...     [ 2.19e-04,  9.70e-04,  1.46e-09, -9.15e-04, -3.38e-04],
   ...     [ 9.70e-04,  1.75e-01, -2.75e-08,  4.74e-02, -6.88e-04],
   ...     [ 1.46e-09, -2.75e-08,  1.53e-13, -7.58e-08, -9.63e-09],
   ...     [-9.15e-04,  4.74e-02, -7.58e-08,  1.04e-01,  2.85e-03],
   ...     [-3.38e-04, -6.88e-04, -9.63e-09,  2.85e-03,  1.33e-03]
   ... ])
   >>> covariance_df = pd.DataFrame.cov_utils.from_array(example_covariance, parameter_names)
   >>> covariance_df
                  z            t0            x0            x1             c
   z   2.190000e-04  9.700000e-04  1.460000e-09 -9.150000e-04 -3.380000e-04
   t0  9.700000e-04  1.750000e-01 -2.750000e-08  4.740000e-02 -6.880000e-04
   x0  1.460000e-09 -2.750000e-08  1.530000e-13 -7.580000e-08 -9.630000e-09
   x1 -9.150000e-04  4.740000e-02 -7.580000e-08  1.040000e-01  2.850000e-03
   c  -3.380000e-04 -6.880000e-04 -9.630000e-09  2.850000e-03  1.330000e-03

From a given dataframe, you can easily extract a subset of the covariance data
using the ``subcovariance`` method:

.. doctest::

   >>> covariance_df.cov_utils.subcovariance(['x1', 'c'])
            x1        c
   x1  0.10400  0.00285
   c   0.00285  0.00133

Module Docs
-----------
"""

from typing import Collection, List, cast, overload
from warnings import warn

import numpy as np
import pandas as pd


[docs]@pd.api.extensions.register_dataframe_accessor('cov_utils')
class CovarianceAccessor:
    """Pandas DataFrame accessor for covariance calculations"""

[docs]    def __init__(self, pandas_obj: pd.DataFrame) -> None:
        """Extends ``pandas`` support for time series data

        DO NOT USE THIS CLASS DIRECTLY! This class is registered as a pandas accessor.
        See the module level usage example for more information.

        Args:
            pandas_obj: Dataframe representing the covariance matrix
        """

        self._obj = pandas_obj.copy()

[docs]    @classmethod
    def from_array(
            cls, cov_array: np.ndarray, param_names: Collection[str] = None, normalized: bool = False
    ) -> pd.DataFrame:
        """Instantiates a Dataframe covariance matrix using data from a `numpy.ndarray` object.

        If paramNames is not None, then the dataframe is indexed by the
        parameter names, and has columns corresponding to the parameter names
        enabling easy access by index or names.

        Args:
            cov_array: Array of the covariance
            param_names: Collection of strings
            normalized: Whether to return the normalized covariance matrix

        Returns:
            A `pandas.DataFrame` with column names and indexes given by the parameter
            names. If paramNames is None, the return is a DataFrame with indexes and
            column names chosen by pandas.
        """

        length, width = np.shape(cov_array)
        if length != width:  # Check for the covariance matrix being square, not checking for symmetry
            raise ValueError('The covariance matrix is not square; length!=width')

        if param_names is not None:
            if len(param_names) != width:
                raise ValueError('The number of parameters must match the length of the covariance matrix')

            cov = pd.DataFrame(cov_array, columns=param_names, index=param_names)

        else:
            cov = pd.DataFrame(cov_array)

        if not normalized:
            return cov

        # normalize if requested
        stds = cov.values.diagonal()
        for i, col in enumerate(cov.columns):
            cov[col] = cov[col] / stds[i]

        for i in range(len(cov)):
            cov.iloc[i] = cov.iloc[i] / stds[i]

        return cov

    # noinspection PyMissingOrEmptyDocstring
    @overload
    def log_covariance(self, param_name: int, param_value: float, factor: float = 1.) -> np.ndarray:
        ...  # pragma: no cover

    # noinspection PyMissingOrEmptyDocstring
    @overload
    def log_covariance(self, param_name: str, param_value: float, factor: float = 1.) -> pd.DataFrame:
        ...  # pragma: no cover

[docs]    def log_covariance(self, param_name, param_value, factor=1.):
        """
        Covariance of the parameters with parameter paramName replaced by
        factor * np.log(param) everywhere, and its true value is paramValue,
        assuming linear propagation

        The ``factor`` parameter is used to scale the logarithm. For example,
        if the relevant transformation is going from ``f`` to ``-2.5 * log10(f)``,
        the factor should be ``-2.5 / np.log(10)``

        Args:
            param_name: Parameter name or integer index specifying the position of the desired variable
            param_value: True or estimated value of the variable itself
            factor: Factor multiplying the natural logarithm
        """

        covariance_df = self._obj
        if isinstance(param_name, int):
            cov = covariance_df.values
            cov[:, param_name] = factor * cov[:, param_name] / param_value
            cov[param_name, :] = factor * cov[param_name, :] / param_value
            warn('Parameter name specified as index. Returning covariance as numpy array.')
            return cov

        covariance_df[param_name] = factor * covariance_df[param_name] / param_value
        covariance_df.loc[param_name] = factor * covariance_df.loc[param_name] / param_value

        return covariance_df

[docs]    def subcovariance(self, param_list: List[str]) -> pd.DataFrame:
        """Returns the covariance of a subset of parameters in a covariance dataFrame.

        The set of parameters in paramList must be a subset of the columns and
        indices of covariance.

        Args:
            param_list: List of parameters for which the subCovariance matrix is desired

        Returns:
            The covariance of the given parameters
        """

        return self._obj.loc[param_list, param_list]

    # noinspection PyPep8Naming
[docs]    def expAVsquare(self, A: np.array) -> float:
        """The expectation of ``(A^T * V) ** 2`` where A is a constant vector and V is
        a random vector ``V ~ N(0., covV)`` by computing ``A^T * covV * A``

        Args:
            A: Vector of constants.

        Returns:
            The variance as a scalar value
        """

        va = np.sum(self._obj * A, axis=1)
        var = np.sum(A * va, axis=0)
        return cast(float, var)
Table Of Contents

Source code for snat_sim.utils.cov_utils