Generate mock data for a cluster ensemble ========================================= Generate cluster ensemble with random masses and redshifts ---------------------------------------------------------- .. code:: ipython3 %load_ext autoreload %autoreload 2 import sys import os import numpy as np from astropy.table import Table from numpy import random import scipy import matplotlib.pyplot as plt import clmm from clmm import GalaxyCluster, ClusterEnsemble, GCData from clmm import Cosmology from clmm.support import mock_data as mock clmm.__version__ .. parsed-literal:: '1.12.0' .. code:: ipython3 np.random.seed(11) .. code:: ipython3 cosmo = Cosmology(H0=71.0, Omega_dm0=0.265 - 0.0448, Omega_b0=0.0448, Omega_k0=0.0) Generating a cluster catalog and associated source catalogs ----------------------------------------------------------- Below, we randomly generate the masses, redshifts, concentrations and coordinates of an ensemble of ``n_clusters`` clusters. For simplicity, the drawing is performed uniformly in logm and redshift (instead of following a halo mass function). .. code:: ipython3 # redshift and mass range of the galaxy clusters z_bin = [0.2, 0.25] logm_bin = np.array([14, 14.1]) # Solar Mass # number of clusters in the ensemble n_clusters = 30 # random draw in the mass and redshift range (for simplicity, uniform instead of following an actual mass function) cluster_m = 10 ** ( (logm_bin[1] - logm_bin[0]) * np.random.random(n_clusters) + logm_bin[0] ) # in M_sun cluster_z = (z_bin[1] - z_bin[0]) * np.random.random(n_clusters) + z_bin[0] # random normal draw of cluster concentration, around c_mean c_mean = 4.0 lnc = abs(np.log(c_mean) + 0.01 * np.random.randn(n_clusters)) concentration = np.exp(lnc) # randomly draw cluster positions on the sky ra = np.random.random(n_clusters) * 360 # from 0 to 360 deg sindec = np.random.random(n_clusters) * 2 - 1 dec = np.arcsin(sindec) * 180 / np.pi # from -90 to 90 deg Background galaxy catalog generation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For each cluster of the ensemble, we use ``mock_data`` to generate a background galaxy catalog and store the results in a ``GalaxyCluster`` object. Note that: - The cluster density profiles follow the NFW parametrisation; - The source redshifts follow the Chang et al. distribution and have associated pdfs; - The shapes include shape noise and shape measurement errors; - Background galaxy catalogs are independent, even if the clusters are close (i.e., no common galaxy between two catalogs). - For each cluster we then compute - the tangential and cross :math:`\Delta\Sigma` for each background galaxy - the weights ``w_ls`` to be used to compute the corresponding radial profiles (see ``demo_compute_deltasigma_weights.ipynb`` notebook for details) The cluster objects are then stored in ``gclist``. .. code:: ipython3 import warnings warnings.simplefilter( "ignore" ) # just to prevent warning print out when looping over the cluster ensemble below .. code:: ipython3 gclist = [] # number of galaxies in each cluster field (alternatively, can use the galaxy density instead) n_gals = 10000 # ngal_density = 10 for i in range(n_clusters): # generate background galaxy catalog for cluster i noisy_data_z = mock.generate_galaxy_catalog( cluster_m[i], cluster_z[i], concentration[i], cosmo, cluster_ra=ra[i], cluster_dec=dec[i], zsrc="chang13", delta_so=200, massdef="critical", halo_profile_model="nfw", zsrc_min=cluster_z[i] + 0.1, zsrc_max=3.0, field_size=10.0, shapenoise=0.04, photoz_sigma_unscaled=0.02, ngals=n_gals, mean_e_err=0.1, ) cl = clmm.GalaxyCluster("mock_cluster", ra[i], dec[i], cluster_z[i], noisy_data_z) # compute DeltaSigma for each background galaxy cl.compute_tangential_and_cross_components( shape_component1="e1", shape_component2="e2", tan_component="DS_t", cross_component="DS_x", cosmo=cosmo, is_deltasigma=True, use_pdz=True, ) # compute the weights to be used to bluid the DeltaSigma radial profiles cl.compute_galaxy_weights( use_pdz=True, use_shape_noise=True, shape_component1="e1", shape_component2="e2", use_shape_error=True, shape_component1_err="e_err", shape_component2_err="e_err", weight_name="w_ls", cosmo=cosmo, is_deltasigma=True, add=True, ) # append the cluster in the list gclist.append(cl) .. code:: ipython3 cl.galcat.columns .. parsed-literal:: Creating ClusterEnsemble object and estimation of individual excess surface density profiles -------------------------------------------------------------------------------------------- From the galaxy cluster object list ``gclist``, we instantiate a cluster ensemble object ``clusterensemble``. This instantiation step uses - the individual galaxy input :math:`\Delta\Sigma_+` and :math:`\Delta\Sigma_{\times}` values (computed in the previous step, ``DS_{t,x}``) - the corresponding individual input weights ``w_ls`` (computed in the previous step) to compute - the output tangential ``DS_t`` and cross signal ``DS_x`` binned profiles (where the binning is controlled by ``bins``) - the associated profile weights ``W_l`` (that will be used to compute the stacked profile) .. code:: ipython3 ensemble_id = 1 names = ["id", "ra", "dec", "z", "radius", "gt", "gx", "W_l"] bins = np.logspace(np.log10(0.3), np.log10(5), 10) clusterensemble = ClusterEnsemble( ensemble_id, gclist, tan_component_in="DS_t", cross_component_in="DS_x", tan_component_out="DS_t", cross_component_out="DS_x", weights_in="w_ls", weights_out="W_l", bins=bins, bin_units="Mpc", cosmo=cosmo, ) There is also the option to create an ``ClusterEnsemble`` object without the clusters list. Then, the user may compute the individual profile for each wanted cluster and compute the radial profile once all the indvidual profiles have been computed. This method may be reccomended if there a large number of clusters to avoid excess of memory allocation, since the user can generate each cluster separately, compute its individual profile and then delete the cluster object. .. code:: ipython3 ensemble_id2 = 2 empty_cluster_ensemble = ClusterEnsemble(ensemble_id2) for cluster in gclist: empty_cluster_ensemble.make_individual_radial_profile( galaxycluster=cluster, tan_component_in="DS_t", cross_component_in="DS_x", tan_component_out="DS_t", cross_component_out="DS_x", weights_in="w_ls", weights_out="W_l", bins=bins, bin_units="Mpc", cosmo=cosmo, ) A third option is where all clusters already have the profile computed, and we just add those to the ``ClusterEnsemble`` object: .. code:: ipython3 # add profiles to gclist for cluster in gclist: cluster.make_radial_profile( tan_component_in="DS_t", cross_component_in="DS_x", tan_component_out="DS_t", cross_component_out="DS_x", weights_in="w_ls", weights_out="W_l", bins=bins, bin_units="Mpc", cosmo=cosmo, use_weights=True, ) .. code:: ipython3 cluster.profile .. raw:: html GCData
defined by: cosmo='CCLCosmology(H0=71.0, Omega_dm0=0.2202, Omega_b0=0.0448, Omega_k0=0.0)', bin_units='Mpc'
with columns: radius_min, radius, radius_max, DS_t, DS_t_err, DS_x, DS_x_err, z, z_err, n_src, W_l
9 objects
radius_minradiusradius_maxDS_tDS_t_errDS_xDS_x_errzz_errn_srcW_l
float64float64float64float64float64float64float64float64float64int64float64
0.299999999999999930.35538888084859040.410092877858960247114109394812.8974981653954774.0371725831060887.9770821817988064.01.22941885527082380.1254222511654474296.36216161623601e-29
0.41009287785896020.49712754212398570.560587228235480428407616591812.97742764109085226.86-57071960817478.7246736076941033.971.3771445318850360.09702952618749792491.15161521880834e-28
0.56058722823548040.67123612279400550.766309432393553274964576073867.1938525941380754.6456308907913134.0242228781968987.931.40053883231784870.06709616391264094862.113338478541802e-28
0.76630943239355320.90638180342879091.047526801535795231222548973785.8628953248329270.2034403827566750.54330962934549207.2421.3571852747152190.0547948614357719951583.639913884168516e-28
1.04752680153579521.25737626108032761.431944268920687443299578835108.5421990385325328.957150722770699.8338620759010434230.5271.34516410410421150.034977356940368692967.038287482539166e-28
1.43194426892068741.70438577640466861.95743382058443269749022992700.26415940514809587.195-8821468381838.70115585239988553.3791.33063787396155030.026819503616418075351.270287908236562e-27
1.95743382058443262.3390498533169922.675765562339765626294486206933.4710815129177769.555-29746134079554.89510982086708852.971.35951921466047550.0195703169606892610512.521188293741188e-27
2.67576556233976563.1943994280425953.65770799978604534645975281894.5787782221363303.094-6755554338437.7027744746597065.1461.36790353652810180.01453405310911619319514.719527212638533e-27
3.65770799978604534.3684553488155725.0000000000000013195312396468.36675740366240659.977-1692230857239.3755890154465299.2721.34960897269319570.01051691211193225536448.730103338179242e-27
.. code:: ipython3 ensemble_id3 = 3 empty_cluster_ensemble = ClusterEnsemble(ensemble_id3) for cluster in gclist: empty_cluster_ensemble.add_individual_radial_profile( galaxycluster=cluster, profile_table=cluster.profile, tan_component="DS_t", cross_component="DS_x", weights="W_l", ) Stacked profile of the cluster ensemble --------------------------------------- The stacked radial profile of the ensemble is then obtained as .. code:: ipython3 clusterensemble.make_stacked_radial_profile(tan_component="DS_t", cross_component="DS_x") Covariance (Bootstrap, sample, Jackknife) of the stack between radial bins -------------------------------------------------------------------------- Radial bins may be correlated and the ``ClusterEnsemble`` class provides three methods to compute the covariance matrix of the stacked signal, from the data: - The Sample covariance directly computes the covariance between radial bins of the ``N`` individual cluster profiles of the stack. - The Bootstrap approach is a resampling technique generating ``n_bootstrap`` ensembles of ``N`` randomly drawn clusters from the original ensemble, allowing for duplication. For each new ensemble, the stacked profile is computed and the covariance computed over the ``n_bootstrap`` stacks. - The Jackknife approach is another resampling technique, that divides the sky in a given number of regions :math:`N_{\rm region}` and computes the covariance removing one region (i.e the clusters of the ensemble in that region) at a time. The stack is then computed using the remaining clusters and the covariance computed over the :math:`N_{\rm region}` number of stacks. The division of the sky is done using the Healpix pixelisation scheme and is controlled by the ``n_side`` parameter, with :math:`N_{\rm region}=12 N_{\rm side}^2`. NB: Approaches exist to compute the theoretical covariance of a stack but these are not (yet) available in ``CLMM`` .. code:: ipython3 clusterensemble.compute_sample_covariance(tan_component="DS_t", cross_component="DS_x") clusterensemble.compute_bootstrap_covariance( tan_component="DS_t", cross_component="DS_x", n_bootstrap=300 ) clusterensemble.compute_jackknife_covariance( n_side=16, tan_component="DS_t", cross_component="DS_x" ) .. code:: ipython3 plt.figure(figsize=(7, 7)) plt.rcParams["axes.linewidth"] = 2 plt.plot( clusterensemble.data["radius"][0], clusterensemble.cov["tan_sc"].diagonal() ** 0.5 / 1e13, "--", c="royalblue", label="Sample", linewidth=3, ) plt.plot( clusterensemble.data["radius"][0], clusterensemble.cov["tan_bs"].diagonal() ** 0.5 / 1e13, "-s", c="g", label="Bootstrap", linewidth=3, markersize=10, ) plt.plot( clusterensemble.data["radius"][0], clusterensemble.cov["tan_jk"].diagonal() ** 0.5 / 1e13, c="r", label="Jackknife", linewidth=3, ) plt.xlabel("R [Mpc]", fontsize=20) plt.ylabel(r"$\sigma_{\Delta\Sigma}\ (\times 10^{13} M_\odot /Mpc^2)$", fontsize=25) plt.tick_params(axis="both", which="major", labelsize=18) plt.legend(frameon=False, fontsize=20) plt.minorticks_on() plt.grid(lw=0.5) plt.grid(which="minor", lw=0.1) plt.loglog() .. parsed-literal:: [] .. image:: demo_mock_ensemble_files/demo_mock_ensemble_22_1.png .. code:: ipython3 fig, axes = plt.subplots(1, 3, figsize=(20, 5)) plt.rcParams["axes.linewidth"] = 2 fig.subplots_adjust(wspace=0.15, hspace=0) maximum = clusterensemble.cov["tan_sc"].max() for ax, cov, label in zip( axes, [clusterensemble.cov["tan_sc"], clusterensemble.cov["tan_bs"], clusterensemble.cov["tan_jk"]], ["Stack : Sample", "Stack : Bootstrap", "Stack : Jackknife"], ): ax.set_title(label, fontsize=20) ax.set_xlabel("radial bin index", fontsize=18) ax.set_ylabel("radial bin index", fontsize=18) ax.tick_params(axis="both", which="major", labelsize=12) im = ax.imshow(cov, cmap="Reds", vmin=0, vmax=maximum, origin="lower") plt.colorbar(im, ax=ax) .. image:: demo_mock_ensemble_files/demo_mock_ensemble_23_0.png Visualizing the stacked profiles and corresponding model -------------------------------------------------------- In the figure below, we plot: - the individual :math:`\Delta\Sigma` profiles of the clusters (light blue) - the stacked signal (red symbols) - the prediction computed using a NFW profile and the mean values of the mass, concentration and redshift in the stack (dashed black) .. code:: ipython3 moo = clmm.Modeling(massdef="critical", delta_mdef=200, halo_profile_model="nfw") moo.set_cosmo(cosmo) # Average values of mass and concentration of the ensemble to be used below to overplot the model on the stacked profile moo.set_concentration(concentration.mean()) moo.set_mass(cluster_m.mean()) .. code:: ipython3 r_stack, gt_stack, gx_stack = (clusterensemble.stacked_data[c] for c in ("radius", "DS_t", "DS_x")) plt.rcParams["axes.linewidth"] = 2 fig, axs = plt.subplots(1, 2, figsize=(17, 6)) err_gt = clusterensemble.cov["tan_sc"].diagonal() ** 0.5 / 1e13 err_gx = clusterensemble.cov["cross_sc"].diagonal() ** 0.5 / 1e13 axs[0].errorbar( r_stack, gt_stack / 1e13, err_gt, markersize=5, c="r", fmt="o", capsize=10, elinewidth=1, zorder=1000, alpha=1, label="Stack", ) axs[1].errorbar( r_stack, gx_stack / 1e13, err_gx, markersize=5, c="r", fmt="o", capsize=10, elinewidth=1, zorder=1000, alpha=1, label="Stack", ) axs[0].plot( clusterensemble.data["radius"][0], moo.eval_excess_surface_density(clusterensemble.data["radius"][0], cluster_z.mean()) / 1e13, "--k", linewidth=3, label="Prediction from stack mean cluster", zorder=100, ) axs[1].plot( clusterensemble.data["radius"][0], 0 * moo.eval_excess_surface_density(clusterensemble.data["radius"][0], cluster_z.mean()) / 1e13, "--k", linewidth=3, label="y=0", zorder=100, ) axs[0].set_xscale("log") axs[1].set_xscale("log") for i in range(n_clusters): axs[0].plot( clusterensemble.data["radius"][i], clusterensemble.data["DS_t"][i] / 1e13, "cyan", label="Individual", alpha=1, linewidth=1, ) axs[1].plot( clusterensemble.data["radius"][i], clusterensemble.data["DS_x"][i] / 1e13, "cyan", label="Individual", alpha=1, linewidth=1, ) if i == 0: axs[0].legend(frameon=False, fontsize=15) axs[1].legend(frameon=False, fontsize=15) # axs[0].plot(np.average(clusterensemble.data['radius'], axis = 0), np.average(clusterensemble.data['gt'], weights = None, axis = 0)/1e13) axs[0].set_xlabel("R [Mpc]", fontsize=20) axs[1].set_xlabel("R [Mpc]", fontsize=20) axs[0].tick_params(axis="both", which="major", labelsize=18) axs[1].tick_params(axis="both", which="major", labelsize=18) axs[0].set_ylabel(r"$\Delta\Sigma_+$ $[\times 10^{13} M_\odot /Mpc^])$", fontsize=20) axs[1].set_ylabel(r"$\Delta\Sigma_\times$ $[\times 10^{13} M_\odot /Mpc^2]$", fontsize=20) axs[0].set_title(r"Tangential", fontsize=20) axs[1].set_title(r"Cross", fontsize=20) for ax in axs: ax.minorticks_on() ax.grid(lw=0.5) ax.grid(which="minor", lw=0.1) plt.show() .. image:: demo_mock_ensemble_files/demo_mock_ensemble_27_0.png Saving/Loading ClusterEnsemble ------------------------------ The ``ClusterEnsemble`` object also have an option for saving/loading: .. code:: ipython3 clusterensemble.save("ce.pkl") .. code:: ipython3 clusterensemble2 = ClusterEnsemble.load("ce.pkl")