Source code for clmm.clusterensemble

"""@file clusterensemble.py
The Cluster Ensemble class
"""
import pickle
import numpy as np
import healpy

from .gcdata import GCData
from .dataops import make_stacked_radial_profile


[docs] class ClusterEnsemble: """Object that contains a list of GalaxyCluster objects Attributes ---------- unique_id : int or string Unique identifier of the galaxy cluster ensemble data : GCData Table with galaxy clusters data (i. e. ids, profiles, redshifts). id_dict: dict Dictionary of indices given the cluster id stacked_data : GCData, None Stacked cluster profiles cov : dict Dictionary with the covariances: * "tan_sc" : tangential component computed with sample covariance * "cross_sc" : cross component computed with sample covariance * "tan_jk" : tangential component computed with bootstrap * "cross_bs" : cross component computed with bootstrap * "tan_jk" : tangential component computed with jackknife * "cross_jk" : cross component computed with jackknife """ def __init__(self, unique_id, gc_list=None, **kwargs): """Initializes a ClusterEnsemble object Parameters ---------- unique_id : int or string Unique identifier of the galaxy cluster ensemble """ if isinstance(unique_id, (int, str)): unique_id = str(unique_id) else: raise TypeError(f"unique_id incorrect type: {type(unique_id)}") self.unique_id = unique_id self.data = GCData(meta={"bin_units": None}) if gc_list is not None: self._add_values(gc_list, **kwargs) self.stacked_data = None self.cov = { "tan_sc": None, "cross_sc": None, "tan_bs": None, "cross_bs": None, "tan_jk": None, "cross_jk": None, } def _add_values(self, gc_list, **kwargs): """Add values for all attributes Parameters ---------- gc_list : list, tuple List of GalaxyCluster objects. gc_cols : list, tuple List of GalaxyCluster objects. """ for cluster in gc_list: self.make_individual_radial_profile(cluster, **kwargs) def __getitem__(self, item): """Returns self.data[item]""" return self.data[item] def __len__(self): """Returns length of ClusterEnsemble""" return len(self.data)
[docs] def make_individual_radial_profile( self, galaxycluster, bin_units, bins=10, error_model="ste", cosmo=None, tan_component_in="et", cross_component_in="ex", tan_component_out="gt", cross_component_out="gx", tan_component_in_err=None, cross_component_in_err=None, use_weights=True, weights_in="w_ls", weights_out="W_l", ): """Compute the individual shear profile from a single GalaxyCluster object and adds the averaged data in the data attribute. Parameters ---------- galaxycluster : GalaxyCluster GalaxyCluster object with cluster metadata and background galaxy data bin_units : str Units to use for the radial bins of the shear profile Allowed Options = ["radians", "deg", "arcmin", "arcsec", "kpc", "Mpc"] (letter case independent) bins : array_like or int, optional User defined bins to use for the shear profile. If a list is provided, use that as the bin edges. If a integer is provided, create that many equally spaced bins between the minimum and maximum angular separations in bin_units. If nothing is provided, defaults to 10 equally spaced bins. error_model : str, optional Statistical error model to use for y uncertainties. (letter case independent) * `ste` - Standard error [=std/sqrt(n) in unweighted computation] (Default). * `std` - Standard deviation. cosmo: clmm.Comology, optional CLMM Cosmology object tan_component_in: string, optional Name of the tangential component column in `galcat` to be binned. Default: 'et' cross_component_in: string, optional Name of the cross component column in `galcat` to be binned. Default: 'ex' tan_component_out: string, optional Name of the tangetial component binned column to be added in profile table. Default: 'gt' cross_component_out: string, optional Name of the cross component binned profile column to be added in profile table. Default: 'gx' tan_component_in_err: string, None, optional Name of the tangential component error column in `galcat` to be binned. Default: None cross_component_in_err: string, None, optional Name of the cross component error column in `galcat` to be binned. Default: None weights_in : str, None Name of the weight column in `galcat` to be considered in binning. weights_out : str, None Name of the weight column to be used in the added to the profile table. """ # pylint: disable=unused-argument tb_kwargs = {} tb_kwargs.update(locals()) tb_kwargs.pop("self") tb_kwargs.pop("tb_kwargs") tb_kwargs.pop("galaxycluster") cl_bin_units = galaxycluster.galcat.meta.get("bin_units", None) self.data.update_info_ext_valid("bin_units", self.data, cl_bin_units, overwrite=False) cl_cosmo = galaxycluster.galcat.meta.get("cosmo", None) self.data.update_info_ext_valid("cosmo", self.data, cl_cosmo, overwrite=False) profile_table = galaxycluster.make_radial_profile( include_empty_bins=True, gal_ids_in_bins=False, add=False, **tb_kwargs ) self.add_individual_radial_profile( galaxycluster, profile_table, tan_component_out, cross_component_out, weights_out, )
[docs] def add_individual_radial_profile( self, galaxycluster, profile_table, tan_component="gt", cross_component="gx", weights="W_l", ): """Compute the individual shear profile from a single GalaxyCluster object and adds the averaged data in the data attribute. Parameters ---------- galaxycluster : GalaxyCluster GalaxyCluster object with cluster metadata and background galaxy data profile_table : GCData Table containing the radius grid points, the tangential, cross shear and weights profiles on that grid. tan_component: string, optional Name of the tangetial component binned column in the profile table. Default: 'gt' cross_component: string, optional Name of the cross component binned profile column in the profile table. Default: 'gx' weights : str, None Name of the weight binned column in the profile table. """ cl_bin_units = profile_table.meta.get("bin_units", None) self.data.update_info_ext_valid("bin_units", self.data, cl_bin_units, overwrite=False) cl_cosmo = profile_table.meta.get("cosmo", None) self.data.update_info_ext_valid("cosmo", self.data, cl_cosmo, overwrite=False) tbcols = ("radius", tan_component, cross_component, weights) data_to_save = [ galaxycluster.unique_id, galaxycluster.ra, galaxycluster.dec, galaxycluster.z, *(np.array(profile_table[col]) for col in tbcols), ] if len(self.data) == 0: for col, data in zip(["cluster_id", "ra", "dec", "z", *tbcols], data_to_save): self.data[col] = [data] else: self.data.add_row(data_to_save)
def _check_empty_data(self): if len(self.data) == 0: raise ValueError( "There is no single cluster profile data. Please run" + "'make_individual_radial_profile' or " "'add_individual_radial_profile' " "for each cluster in your catalog" )
[docs] def make_stacked_radial_profile(self, tan_component="gt", cross_component="gx", weights="W_l"): """Computes stacked profile and mean separation distances and add it internally to `stacked_data`. Parameters ---------- tan_component : string, optional Name of the tangential component column in `data`. Default: 'gt' cross_component : string, optional Name of the cross component column in `data`. Default: 'gx' weights : str Name of the weights column in `data`. """ self._check_empty_data() radius, components = make_stacked_radial_profile( self.data["radius"], self.data[weights], [self.data[tan_component], self.data[cross_component]], ) self.stacked_data = GCData( [radius, *components], meta=self.data.meta, names=("radius", tan_component, cross_component), )
[docs] def compute_sample_covariance(self, tan_component="gt", cross_component="gx"): """Compute Sample covariance matrix for cross and tangential and cross stacked profiles and updates .cov dict (`tan_sc`, `cross_sc`). Parameters ---------- tan_component : string, optional Name of the tangential component column in `data`. Default: 'gt' cross_component : string, optional Name of the cross component column in `data`. Default: 'gx' """ self._check_empty_data() n_catalogs = len(self.data) self.cov["tan_sc"] = np.cov(self.data[tan_component].T, bias=False) / n_catalogs self.cov["cross_sc"] = np.cov(self.data[cross_component].T, bias=False) / n_catalogs
[docs] def compute_bootstrap_covariance( self, tan_component="gt", cross_component="gx", n_bootstrap=10 ): """Compute the bootstrap covariance matrix, add boostrap covariance matrix for tangential and cross stacked profiles and updates .cov dict (`tan_jk`, `cross_bs`). Parameters ---------- tan_component : string, optional Name of the tangential component column in `data`. Default: 'gt' cross_component : string, optional Name of the cross component column in `data`. Default: 'gx' n_bootstrap : int number of bootstrap resamplings """ self._check_empty_data() n_catalogs = len(self) cluster_index = np.arange(n_catalogs) cluster_index_bootstrap = [ np.random.choice(cluster_index, n_catalogs) for n_boot in range(n_bootstrap) ] gt_boot, gx_boot = make_stacked_radial_profile( self["radius"][None, cluster_index_bootstrap][0].transpose(1, 2, 0), self["W_l"][None, cluster_index_bootstrap][0].transpose(1, 2, 0), [ self[tan_component][None, cluster_index_bootstrap][0].transpose(1, 2, 0), self[cross_component][None, cluster_index_bootstrap][0].transpose(1, 2, 0), ], )[1] coeff = (n_catalogs / (n_catalogs - 1)) ** 2 self.cov["tan_bs"] = coeff * np.cov(np.array(gt_boot), bias=False, ddof=0) self.cov["cross_bs"] = coeff * np.cov(np.array(gx_boot), bias=False)
[docs] def compute_jackknife_covariance(self, tan_component="gt", cross_component="gx", n_side=16): """Compute the jackknife covariance matrix, add boostrap covariance matrix for tangential and cross stacked profiles and updates .cov dict (`tan_jk`, `cross_jk`). Uses healpix sky area sub-division : https://healpix.sourceforge.io Parameters ---------- tan_component : string, optional Name of the tangential component column in `data`. Default: 'gt' cross_component : string, optional Name of the cross component column in `data`. Default: 'gx' n_side : int healpix sky area division parameter (number of sky area : 12*n_side^2) """ # may induce artificial noise if there are some healpix pixels # not covering entirely the 2D map of clusters self._check_empty_data() pixels = healpy.ang2pix(n_side, self.data["ra"], self.data["dec"], nest=True, lonlat=True) pixels_list_unique = np.unique(pixels) gt_jack, gx_jack = [], [] for hp_list_delete in pixels_list_unique: mask = ~np.isin(pixels, hp_list_delete) gt, gx = make_stacked_radial_profile( self["radius"][mask], self["W_l"][mask], [self[tan_component][mask], self[cross_component][mask]], )[1] gt_jack.append(gt) gx_jack.append(gx) n_jack = pixels_list_unique.size coeff = (n_jack - 1) ** 2 / (n_jack) self.cov["tan_jk"] = coeff * np.cov(np.transpose(gt_jack), bias=False, ddof=0) self.cov["cross_jk"] = coeff * np.cov(np.transpose(gx_jack), bias=False, ddof=0)
[docs] def save(self, filename, **kwargs): """Saves GalaxyCluster object to filename using Pickle""" with open(filename, "wb") as fin: pickle.dump(self, fin, **kwargs)
[docs] @classmethod def load(cls, filename, **kwargs): """Loads GalaxyCluster object to filename using Pickle""" with open(filename, "rb") as fin: self = pickle.load(fin, **kwargs) return self