Using the LSST Stack tools to do positional matching on coadd and src catalogs

Using the LSST Stack tools to do positional matching on coadd and src catalogs#

Owner: Jim Chiang (@jchiang87)
Updated by: Javier Sánchez (@fjaviersanchez)
Last Run: 2020-06-12

In this notebook, we use the data butler to retrieve catalogs from coadd and visit-level analyses of Run2.2i, and use the lsst.afw.table.matchRaDec function to do positional matching against galaxy truth info extracted from the cosmoDC2 v1.1.4 extragalactic catalog. To enable this, we show how to create a SourceCatalog object from the galaxy truth info provided by the GCR interface.

Set Up#

import warnings
from collections import namedtuple
import numpy as np
import matplotlib.pyplot as plt
import lsst.afw.geom as afw_geom
import lsst.afw.table as afw_table
import lsst.daf.persistence as dp
import GCRCatalogs
import healpy as hp
import lsst.geom

How To Create a Source Catalog#

In order to use the Stack’s spatial matching code, we will need to reformat the extragalactic catalog galaxy position and magnitude information into an afw SourceCatalog object. These table objects are initialized by a “schema”, which in turn is built from column definition “Coldef” objects. The mag_cols function below shows how these Coldefs can be created.

def make_SourceCatalog(new_cols):
    """
    Make a SourceCatalog to contain id and coordinates for each object, plus any new
    columns.
    
    Parameters
    ----------
    new_cols: list of Coldefs
        Column info for adding to an afw.table schema.

    Returns
    -------
    lsst.afw.table.SourceCatalog: An empty SourceCatalog with the desired schema.
    """
    # The minimal schema just contains the `id`, `coord_ra`, and `coord_dec` fields.
    schema = afw_table.SourceTable.makeMinimalSchema()
    for coldef in new_cols:
        schema.addField(coldef.name, type=coldef.type, doc=coldef.doc)
    return afw_table.SourceCatalog(schema)


def mag_cols(bands):
    """Return column information for adding magnitude columns to an afw.table schema."""
    Coldef = namedtuple('Coldef', 'name type doc'.split())
    return [Coldef(f'mag_{x}', float, f'{x}-magnitude')
            for x in bands]

Selecting Galaxies#

We are going to downselect cosmoDC2 to the sky region being considered. We’ll make a general-purpose RegionSelector class, and then sub-class it for selecting objects in either CCDs or patches.

class RegionSelector:
    """
    Class to rotate the protoDC2 galaxies to the Run1.1p sky location and downselect those galaxies
    based on a magnitude limit and on the coordinates of the subregion (i.e., patch or CCD) being
    considered.
    """

    def __init__(self):
        pass
    
    def _set_coord_range(self, bbox, wcs):
        """
        Set the coordinate range of the region.
        
        Notes
        -----
        This method is used by the RegionSelector's subclasses.
        
        Parameters
        ----------
        bbox: Calexp.BBox
            Defines corners of region's bounding box
        wcs: Calexp.Wcs
            Defines pixel to world (sky) coordinate transformation
        """
        region_box = lsst.geom.Box2D(bbox)
        corners = region_box.getCorners()
        ra_values, dec_values = [], []
        for corner in corners:
            ra, dec = wcs.pixelToSky(corner)
            ra_values.append(ra.asDegrees())
            dec_values.append(dec.asDegrees())
        self.ra_range = min(ra_values), max(ra_values)
        self.dec_range = min(dec_values), max(dec_values)

    def __call__(self, gc, band, max_mag):
        """
        Create a SourceCatalog object from the input galaxy catalog for the specified band, and
        apply the region and magnitude cuts.

        Parameters
        ----------
        gc: GCRCatalogs GalaxyCatalog
            The galaxy catalog obtained via GCR.
        band: str
            The band, e.g., 'i', to use for the magnitude comparison with the values measured
            from the simulated data.
        max_mag: float
            The magnitude limit to apply.

        Returns
        -------
        lsst.afw.table.SourceCatalog
        """
        # Retrieve the healpix pixels corresponding to the catalog so we don't query the full catalog
        vertices = hp.ang2vec(np.array([self.ra_range[0], self.ra_range[1],
                                        self.ra_range[1], self.ra_range[0]]),
                              np.array([self.dec_range[0], self.dec_range[0],
                                        self.dec_range[1], self.dec_range[1]]), lonlat=True)
        ipix = hp.query_polygon(32, vertices, inclusive=True)
        # We are going to pass the healpixels that overlap with our catalog as native filters to speed up the process
        native_filter = f'(healpix_pixel == {ipix[0]})'
        for ipx in ipix:
            native_filter=native_filter+f' | (healpix_pixel == {ipx})'
        # Retrieve the desired columns and cut on the magnitude values.
        bandname = f'mag_{band}'
        filter_ = f'{bandname} < {max_mag}'
        print("Applying magnitude filter:", filter_)
        gc_cols = gc.get_quantities(['galaxy_id', 'ra', 'dec',
                                     bandname], filters=[filter_, 
                                                         f'ra > {self.ra_range[0]}',
                                                         f'ra < {self.ra_range[1]}',
                                                         f'dec > {self.dec_range[0]}',
                                                         f'dec < {self.dec_range[1]}',
                                                        ],
                                    native_filters = native_filter)
        print("Number of galaxies within region:", len(gc_cols['ra']))

        # Create a SourceCatalog with the galaxy_ids, coordinates, magnitudes
        galaxy_catalog = make_SourceCatalog(mag_cols((band,)))
        for id_, ra, dec, mag in zip(gc_cols['galaxy_id'], gc_cols['ra'], gc_cols['dec'], gc_cols[bandname]):
            record = galaxy_catalog.addNew()
            record.set('id', id_)
            record.set('coord_ra', lsst.geom.Angle(ra, lsst.geom.degrees))
            record.set('coord_dec', lsst.geom.Angle(dec, lsst.geom.degrees))
            record.set(f'mag_{band}', mag)
        return galaxy_catalog

class CcdSelector(RegionSelector):
    """RegionSelector to use with visit-level calexps, i.e., single CCD exposures."""
    def __init__(self, butler, visit, raft, sensor):
        super(CcdSelector, self).__init__()
        # Get the CCD boundaries
        dataId = dict(visit=visit, raft=raft, sensor=sensor)
        calexp = butler.get('calexp', dataId=dataId)
        self._set_coord_range(calexp.getBBox(), calexp.getWcs())


class PatchSelector(RegionSelector):
    """RegionSelector to use with skyMap patches, i.e., coadd data."""
    def __init__(self, butler, tract, patch):
        super(PatchSelector, self).__init__()
        # Get the patch boundaries.
        skymap = butler.get('deepCoadd_skyMap')
        tractInfo = skymap[tract]
        patchInfo = tractInfo.getPatchInfo(eval(patch))
        self._set_coord_range(patchInfo.getOuterBBox(), tractInfo.getWcs())

Matching Experiments#

Now we have the tools we need, let’s read in the Run 2.2i-DR3 coadd catalog data and match it to the extragalactic catalog input.

# Create a data butler for the repo.
repo = '/global/cfs/cdirs/lsst/production/DC2_ImSim/Run2.2i/desc_dm_drp/v19.0.0-v1/rerun/run2.2i-coadd-wfd-dr3-v1'
butler = dp.Butler(repo)

Sources or Objects?#

We can consider visit-level src catalog data, in which case we would provide a dataId to the butler with (visit, raft, sensor) ids; or we can consider coadd object data, for which we would provid a dataId with (filter, tract, patch) ids.

Somewhat different flux models are available in the Run2.2i data for src catalogs versus coadd catalogs.

mag_max = 24.5
# we_are_matching = "Sources"
we_are_matching = "Objects"

if we_are_matching == "Sources":
    # Get the src catalog for a selected visit, raft, and sensor:
    visit = 219976
    raft = '2,2'
    sensor = '1,1'
    title = f'Run2.2i-DR3, visit={visit}, raft={raft}, sensor={sensor}'
    dataId = dict(visit=visit, raft=raft, sensor=sensor)
    catalog = butler.get('src', dataId=dataId)
    calexp = butler.get('calexp', dataId=dataId)
    filter_ = calexp.getInfo().getFilter().getFilter()
    calib = calexp.getPhotoCalib()
    #flux_model = 'ext_photometryKron_KronFlux'
    flux_model = 'modelfit_CModel'
    region_selector = CcdSelector(butler, visit, raft, sensor)

else:
    # Get the coadd catalog for a selected filter, tract, and patch:
    filter_ = 'r'
    tract = 4638
    patch = '2,2'
    title = f'Run2.2i-DR3, filter={filter_}, tract={tract}, patch={patch}'
    dataId = dict(tract=tract, patch=patch, filter=filter_)
    catalog = butler.get('deepCoadd_meas', dataId=dataId)
    calexp = butler.get('deepCoadd', dataId=dataId)
    calib = calexp.getPhotoCalib()
    flux_model = 'modelfit_CModel'
    region_selector = PatchSelector(butler, tract, patch)

Focusing on Well-measured Galaxies#

Galaxies can be selected as extended objects (or sources) using the base_ClassificationExtendedness_value. We use the model flag and flux to ensure that a flux value could be measured, and then apply a selection to ensure that we get deblended objects. Finally, we apply a relatively bright magnitude cut, to avoid confusion when performing the positional match.

# Extract columns to use in the selection:
ext = catalog.get('base_ClassificationExtendedness_value')
model_flag = catalog.get(flux_model + '_flag')
model_flux = catalog.get(flux_model + '_instFlux')
num_children = catalog.get('deblend_nChild')

# Apply the extendedness, flag, and blendedness cuts using the subset method:
cat_temp = catalog.subset((ext == 1) &
                          (model_flag == False) &
                          (model_flux > 0) &
                          (num_children == 0))
# Extract the magnitude and again use subset to apply the depth cut:
_mag = calib.instFluxToMagnitude(cat_temp, flux_model) 
mag = _mag[:,0] #Magnitude
magerr = _mag[:,1] #Magnitude Error
#print(mag, len(mag))
cat_temp = cat_temp.subset(mag < mag_max)

# Repackage everything in a more minimal SourceCatalog and add a magnitude column for comparing
# to the galaxy catalog truth values.
drp_catalog = make_SourceCatalog(mag_cols((filter_,)))
for record in cat_temp:
    new_rec = drp_catalog.addNew()
    for name in 'id coord_ra coord_dec parent'.split():
        new_rec.set(name, record[name])
    mag = calib.instFluxToMagnitude(record, flux_model)
    new_rec.set(f'mag_{filter_}', mag.value)
                
print("Number of observed objects in our DRP galaxy catalog", len(drp_catalog))

Extracting the Truth Info#

We can now use our region_selector object to process the protoDC2 extragalactic catalog. Note that while we instantiated it with a butler, so that it could work on selecting galaxy observations from either a tract or a CCD, we can call it as a function (via its __call__ method) which takes a GCR catalog object as its first argument. The result will be a DM Stack SourceCatalog object, that we can match to our observed DRP catalog.

# Read in the galaxy catalog data.
with warnings.catch_warnings():
    warnings.filterwarnings('ignore')
    gc = GCRCatalogs.load_catalog('cosmoDC2_v1.1.4_image')

# Create a SourceCatalog from the gc data, applying the region and magnitude selections.
galaxy_catalog = region_selector(gc, band=filter_, max_mag=mag_max)

Compare the number of galaxies within our region with th enumber of observed galaxies in the DRP catalog. Is this what you would expect?

Doing the Matching#

We can now carry out the spatial matching, and compute some quantities to plot.

# Find positional matches within 100 milliarcseconds:
radius = lsst.geom.Angle(0.1, lsst.geom.arcseconds)
matches = afw_table.matchRaDec(drp_catalog, galaxy_catalog, radius)

matches is a list of match objects, each one containing an observed-true matchd galaxy pair. The code below shows how to work with these, looping over the matches and extracting information to plot.

# Compare magnitudes for matched objects:
drp_mag = np.zeros(len(matches), dtype=np.float)
gc_mag = np.zeros(len(matches), dtype=np.float)
sep = np.zeros(len(matches), dtype=np.float)
# Arrays for a quiver plot.
u = np.zeros(len(matches), dtype=np.float)
v = np.zeros(len(matches), dtype=np.float)
for i, match in enumerate(matches):
    drp_mag[i] = match.first[f'mag_{filter_}']
    gc_mag[i] = match.second[f'mag_{filter_}']
    sep[i] = np.degrees(match.distance)*3600.*1000.
    u[i] = match.first['coord_ra'] - match.second['coord_ra']
    v[i] = match.first['coord_dec'] - match.second['coord_dec']
print("Number of matches:", len(matches))

# Start a 2x2 panel figure:
fig = plt.figure(figsize=(8, 8))
frame_axes = fig.add_subplot(111, frameon=False)
frame_axes.set_title(title)
frame_axes.get_xaxis().set_ticks([])
frame_axes.get_yaxis().set_ticks([])

delta_mag = drp_mag - gc_mag  # Observed - True

# Upper Left: Histogram of match separations.
fig.add_subplot(2, 2, 1)
plt.hist(sep, range=(0, 100), histtype='step', bins=40)
plt.xlabel('separation (marcsec)')
plt.ylabel('entries / bin')

# Upper Right: Quiver plot of (DRP - galaxy_catalog) positions on the sky.
fig.add_subplot(2, 2, 2)
plt.quiver(np.degrees(drp_catalog['coord_ra']),
           np.degrees(drp_catalog['coord_dec']),
           u, v)
plt.xlabel('RA (deg)')
plt.ylabel('Dec (deg)')

# Lower left: Difference in magnitudes vs true magnitude (mag_gc).
fig.add_subplot(2, 2, 3)
plt.errorbar(gc_mag, delta_mag, fmt='.', alpha=0.1)
plt.xlabel(f'True mag {filter_}_gc'.format(filter_))
plt.ylabel(f'Mag difference ({filter_}_gc - {filter_}_drp)')

# Difference in magnitudes vs separation.
fig.add_subplot(2, 2, 4)
plt.errorbar(sep, delta_mag, fmt='.', alpha=0.1)
plt.xlabel('separation (mas)')
plt.ylabel(f'Mag difference ({filter_}_gc - {filter_}_drp)')

plt.tight_layout()

Going Further#

The available columns in a SourceCatalog can be seen by printing the info from the schema that it carries around with it. The cells below show you what you have available. The drp_catalog and galaxy_catalog that we made to do the spatial matching only have positions and magnitudes in them - but the parent catalogs have many more quantities.

for item in drp_catalog.schema:
    print(f"{item.field.getName()}:  {item.field.getDoc()}")

for item in galaxy_catalog.schema:
    print(f"{item.field.getName()}:  {item.field.getDoc()}")

# All the DRP measurements:
for item in catalog.schema:
    print(f"{item.field.getName()}:  {item.field.getDoc()}")

# All the cosmoDC2 parameters:
# help(gc)
gc.list_all_quantities()