Source code for txpipe.random_cats

from .base_stage import PipelineStage
from .data_types import MapsFile, YamlFile, RandomsCatalog, TomographyCatalog, HDFFile
from .utils import choose_pixelization, Splitter
import numpy as np


[docs]class TXRandomCat(PipelineStage): name='TXRandomCat' inputs = [ ('aux_lens_maps', MapsFile), ('lens_photoz_stack', HDFFile), ] outputs = [ ('random_cats', RandomsCatalog), ('binned_random_catalog', RandomsCatalog), ] config_options = { 'density': 100., # number per square arcmin at median depth depth. Not sure if this is right. 'Mstar': 23.0, # Schecther distribution Mstar parameter 'alpha': -1.25, # Schecther distribution alpha parameter } def run(self): import scipy.special import scipy.stats import healpy from . import randoms # Load the input depth map with self.open_input('aux_lens_maps', wrapper=True) as maps_file: depth = maps_file.read_map('depth/depth') info = maps_file.read_map_info('depth/depth') nside = info['nside'] scheme = choose_pixelization(**info) pz_stack = self.open_input('lens_photoz_stack') # Cut down to pixels that have any objects in pixel = np.where(depth > 0)[0] depth = depth[pixel] npix = depth.size if len(pixel)==1: raise ValueError("Only one pixel in depth map!") ### This will likely need to be changed with the density Schechter function and density ### being calculated individually for each redshift bin. When it comes time to update the code ### to do that, move the lines below (down to the multi hash line) into the Ntomo loop # Read configuration values Mstar = self.config['Mstar'] alpha15 = 1.5 + self.config['alpha'] density_at_median = self.config['density'] # Work out the normalization of a Schechter distribution # with the given median depth median_depth = np.median(depth) x_med = 10.**(0.4*(Mstar-median_depth)) phi_star = density_at_median / scipy.special.gammaincc(alpha15, x_med) # Work out the number density in each pixel based on the # given Schecter distribution x = 10.**(0.4*(Mstar-depth)) density = phi_star * scipy.special.gammaincc(alpha15, x) # Pixel geometry - area in arcmin^2 pix_area = scheme.pixel_area(degrees=True) * 60. * 60. vertices = scheme.vertices(pixel) ################################################################################## ### I think this is redundant if your pz_stack file has separated lens bins ### The current file in 'pz_stack' only has 1 lens bin, but zbins loads 4 bins!! Ntomo = len(pz_stack['n_of_z']['lens'].keys())-1 z_photo_arr = pz_stack['n_of_z']['lens']['z'][:] ### Loop over the tomographic bins to find number of galaxies in each pixel/zbin ### When the density changes per redshift bin, this can go into the main Ntomo loop numbers = np.zeros((Ntomo, npix), dtype=int) if self.rank == 0: for j in range(Ntomo): # Poisson distribution about mean numbers[j] = scipy.stats.poisson.rvs(density * pix_area, 1) # give all processors the same values if self.comm is not None: self.comm.Bcast(numbers) # total number of objects in each bin bin_counts = numbers.sum(axis=1) # start index of the total number in the global bin bin_starts = np.concatenate([[0], np.cumsum(bin_counts)])[:-1] # The starting index of each pixel in the array, so we can parallelize pix_starts = np.zeros((Ntomo, npix), dtype=int) for j in range(Ntomo): pix_starts[j] = np.concatenate([[0], np.cumsum(numbers[j])])[:-1] ### Get total number of randoms in all zbins ### Once the density gets updated per redshift bin, the output file will need to ### combine all the tomographic bins in a bit more clever/convenient way than currently n_total = numbers.sum() if self.rank == 0: print(f"Generating {n_total} randoms") for j in range(Ntomo): print(f" - {bin_counts[j]} in bin {j}") # First output is the all of the output_file = self.open_output('random_cats', parallel=True) group = output_file.create_group('randoms') ra_out = group.create_dataset('ra', (n_total,), dtype=np.float64) dec_out = group.create_dataset('dec', (n_total,), dtype=np.float64) z_out = group.create_dataset('z', (n_total,), dtype=np.float64) bin_out = group.create_dataset('bin', (n_total,), dtype=np.int16) # Second output is specific to an individual bin, so we can just load # a single bin as needed binned_output = self.open_output("binned_random_catalog", parallel=True) binned_group = binned_output.create_group("randoms") subgroups = [] binned_group.attrs['nbin'] = Ntomo for i in range(Ntomo): g = binned_group.create_group(f"bin_{i}") g.create_dataset('ra', (bin_counts[i],)) g.create_dataset('dec', (bin_counts[i],)) g.create_dataset('z', (bin_counts[i],)) subgroups.append(g) pixels_per_proc = npix // self.size for j in range(Ntomo): ### Load pdf of ith lens redshift bin pz n_hist = pz_stack[f'n_of_z/lens/bin_{j}'][:] ### Make cdf and normalise z_cdf = np.cumsum(n_hist) z_cdf_norm = z_cdf / np.float(max(z_cdf)) subgroup = subgroups[j] # Generate the random points in each pixel ndone = 0 for i, (vertices_i) in self.split_tasks_by_rank(enumerate(vertices)): if (ndone % 1000 == 0): print( f"Rank {self.rank} done {ndone:,} of its {pixels_per_proc:,} pixels for bin {j}" ) # Use the pixel vertices to generate the points ### This likely wont work for curved sky maps since healpy pixels aren't ### fully quadrilateral... not sure how big of a difference (if any) this ### will make N = numbers[j, i] p1, p2, p3, p4 = vertices_i.T P = randoms.random_points_in_quadrilateral(p1, p2, p3, p4, N) # Convert to RA/Dec # This is not healpy-dependent so we just use it as a convenience function ra, dec = healpy.vec2ang(P, lonlat=True) bin_index = np.repeat(j, N) ### Create random values [0,1] equal to the number of galaxies per pixel cdf_rand_val = np.random.uniform(0,1.0,N) ### Interpolate those random values to a redshift value given by the cdf # z_photo_rand = np.interp(cdf_rand_val,z_cdf_norm,z_photo_arr) z_interp_func = scipy.interpolate.interp1d(z_cdf_norm,z_photo_arr) # Sometimes we don't quite go down to z - deal with that cdf_rand_val = cdf_rand_val.clip(z_cdf_norm.min(), z_cdf_norm.max()) z_photo_rand = z_interp_func(cdf_rand_val) # Save output to the generic non-binned output index = bin_starts[j] + pix_starts[j, i] ra_out[index:index+N] = ra dec_out[index:index+N] = dec z_out[index:index+N] = z_photo_rand bin_out[index:index+N] = bin_index # Save to the bit that is specific to this bin index = pix_starts[j, i] subgroup['ra'][index:index+N] = ra subgroup['dec'][index:index+N] = dec subgroup['z'][index:index+N] = z_photo_rand ndone += 1 if self.comm is not None: self.comm.Barrier() output_file.close() binned_output.close()
if __name__ == '__main__': PipelineStage.main()