Source code for txpipe.psf_diagnostics

from .base_stage import PipelineStage
from .data_types import Directory, ShearCatalog, HDFFile, PNGFile, TomographyCatalog, RandomsCatalog, YamlFile
from parallel_statistics import ParallelHistogram, ParallelMeanVariance
import numpy as np
from .utils.calibration_tools import read_shear_catalog_type
from .utils.calibration_tools import apply_metacal_response, apply_lensfit_calibration
from .plotting import manual_step_histogram

STAR_PSF_USED = 0
STAR_PSF_RESERVED = 1
STAR_TYPES = [STAR_PSF_USED, STAR_PSF_RESERVED]
STAR_TYPE_NAMES = ['PSF-used', 'PSF-reserved']
STAR_COLORS = ['blue', 'orange', 'green']

[docs]class TXPSFDiagnostics(PipelineStage):
    """
    """
    name='TXPSFDiagnostics'

    inputs = [
        ('star_catalog', HDFFile)
    ]
    outputs = [
        ('e1_psf_residual_hist', PNGFile),
        ('e2_psf_residual_hist', PNGFile),
        ('T_frac_psf_residual_hist', PNGFile),
        ('star_psf_stats', YamlFile),

    ]
    config_options = {}

    def run(self):
        # PSF tests
        import matplotlib
        matplotlib.use('agg')

        # Make plotters - in each case we supply the function to make
        # the thing we want to histogram, the name, label, and range
        plotters = [
            self.plot_histogram(
                lambda d: (d['measured_e1'] - d['model_e1']),
                'e1_psf_residual',
                '$e_{1}-e_{1,psf}$',
                np.linspace(-0.1, 0.1, 51),
                ),

            self.plot_histogram(
                lambda d: (d['measured_e2'] - d['model_e2']),
                'e2_psf_residual',
                '$e_{2}-e_{2,psf}$',
                np.linspace(-0.1, 0.1, 51),
                ),

            self.plot_histogram(
                lambda d: (d['measured_T'] - d['model_T']) / d['measured_T'],
                'T_frac_psf_residual',
                '$(T-T{psf})/T{psf}$',
                np.linspace(-0.1, 0.1, 51),
                ),

        ]

        # Start off each of the plotters.  This will make them all run up to the
        # first yield statement, then pause and wait for the first chunk of data
        for plotter in plotters:
            print(plotter)
            plotter.send(None)

        # Create an iterator for reading through the input data.
        # This method automatically splits up data among the processes,
        # so the plotters should handle this.
        chunk_rows = 10000
        star_cols = ['measured_e1',
                     'model_e1',
                     'measured_e2',
                     'model_e2',
                     'measured_T',
                     'model_T',
                     'calib_psf_used',
                     'calib_psf_reserved',
                     ]
        iter_star = self.iterate_hdf('star_catalog','stars',star_cols,chunk_rows)

        # Now loop through each chunk of input data, one at a time.
        # Each time we get a new segment of data, which goes to all the plotters
        for (start, end, data), in zip(iter_star):
            print(f"Read data {start} - {end}")
            data['star_type'] = load_star_type(data)
            # This causes each data = yield statement in each plotter to
            # be given this data chunk as the variable data.
            #data.update(data2)
            #data.update(data3)
            for plotter in plotters:
                plotter.send(data)

        # Tell all the plotters to finish, collect together results from the different
        # processors, and make their final plots.  Plotters need to respond
        # to the None input and
        results = {}
        for plotter in plotters:
            try:
                plotter.send(None)
            except StopIteration as err:
                # This feels like some cruel abuse because I don't
                # really understand how co-routines should be used.
                results.update(err.value)

        # save all counts, means, and sigmas to yaml file
        with self.open_output("star_psf_stats", wrapper=True) as f:
            f.write(results)

    def plot_histogram(self, function, output_name, xlabel, edges):
        import matplotlib.pyplot as plt
        print(f"Plotting {output_name}")
        counters = {s:ParallelHistogram(edges) for s in STAR_TYPES}
        stats = ParallelMeanVariance(len(STAR_TYPES))

        while True:
            data = yield

            if data is None:
                break

            # Get the specific quantity to plot
            # from the full data set
            value = function(data)

            for s in STAR_TYPES:
                r = data['star_type'] == s
                # build up histogram
                d = value[r]
                counters[s].add_data(d)
                # Also record data for mean and variance calculation
                # There are some NaNs in here which are presumably
                # not propagated to PSF estimation anyway
                stats.add_data(s, d[np.isfinite(d)])

        counts = {}
        for s in STAR_TYPES:
            counts[s] = counters[s].collect(self.comm)

        fig = self.open_output(output_name + "_hist", wrapper=True, figsize=(6,15))
        for s in STAR_TYPES:
            plt.subplot(len(STAR_TYPES), 1, s+1)
            manual_step_histogram(edges,
                                  counts[s],
                                  color=STAR_COLORS[s],
                                  label=STAR_TYPE_NAMES[s]
            )
            plt.title(STAR_TYPE_NAMES[s])
            plt.xlabel(xlabel)
            plt.ylabel(r'$N_{stars}$')
        fig.close()

        n, mu, sigma2 = stats.collect(self.comm)
        results = {}
        for s in STAR_TYPES:
            name = STAR_TYPE_NAMES[s]
            results[f'{output_name}_{name}_n'] = int(n[s])
            results[f'{output_name}_{name}_mu'] =float(mu[s])
            results[f'{output_name}_{name}_std'] = float(sigma2[s]**0.5)

        return results


[docs]class TXRoweStatistics(PipelineStage):
    """
    People sometimes think that these statistics are called the Rho statistics,
    because we usually use that letter for them.  Not so.  They are named after
    the wonderfully incorrigible rogue Barney Rowe, now sadly lost to high finance,
    who presented the first two of them in MNRAS 404, 350 (2010).
    """

    name = 'TXRoweStatistics'

    inputs =[('star_catalog', HDFFile)]
    outputs = [
        ('rowe134', PNGFile),
        ('rowe25', PNGFile),
        ('rowe_stats', HDFFile),
    ]

    config_options = {
        'min_sep':0.5,
        'max_sep':250.0,
        'nbins':20,
        'bin_slop':0.01,
        'sep_units':'arcmin',
        'psf_size_units':'sigma',
    }

    def run(self):
        import treecorr
        import h5py
        import matplotlib
        matplotlib.use('agg')

        ra, dec, e_psf, de_psf, T_f, star_type = self.load_stars()

        rowe_stats = {}
        for t in STAR_TYPES:
            s = star_type == t
            rowe_stats[1, t] = self.compute_rowe(1, s, ra, dec, de_psf,       de_psf)
            rowe_stats[2, t] = self.compute_rowe(2, s, ra, dec, e_psf,        de_psf)
            rowe_stats[3, t] = self.compute_rowe(3, s, ra, dec, e_psf*T_f, e_psf*T_f)
            rowe_stats[4, t] = self.compute_rowe(4, s, ra, dec, de_psf,    e_psf*T_f)
            rowe_stats[5, t] = self.compute_rowe(5, s, ra, dec, e_psf,     e_psf*T_f)

        self.save_stats(rowe_stats)
        self.rowe_plots(rowe_stats)



    def load_stars(self):
        with self.open_input('star_catalog') as f:
            g = f['stars']
            ra = g['ra'][:]
            dec = g['dec'][:]
            e1 = g['measured_e1'][:]
            e2 = g['measured_e2'][:]
            de1 = e1 - g['model_e1'][:]
            de2 = e2 - g['model_e2'][:]
            if self.config['psf_size_units']=='T':
                T_frac = (g['measured_T'][:] - g['model_T'][:]) / g['measured_T'][:]
            elif self.config['psf_size_units']=='sigma':
                T_frac = (g['measured_T'][:]**2 - g['model_T'][:]**2) / g['measured_T'][:]**2

            e_psf = np.array((e1, e2))
            de_psf = np.array((de1, de2))

            star_type = load_star_type(g)

        return ra, dec, e_psf, de_psf, T_frac, star_type

    def compute_rowe(self, i, s, ra, dec, q1, q2):
        # select a subset of the stars
        ra = ra[s]
        dec = dec[s]
        q1 = q1[:, s]
        q2 = q2[:, s]
        n = len(ra)
        print(f"Computing Rowe statistic rho_{i} from {n} objects")
        import treecorr
        corr = treecorr.GGCorrelation(self.config)
        cat1 = treecorr.Catalog(ra=ra, dec=dec, g1=q1[0], g2=q1[1], ra_units='deg', dec_units='deg')
        cat2 = treecorr.Catalog(ra=ra, dec=dec, g1=q2[0], g2=q2[1], ra_units='deg', dec_units='deg')
        corr.process(cat1, cat2)
        return corr.meanr, corr.xip, corr.varxip**0.5

    def rowe_plots(self, rowe_stats):
        # First plot - stats 1,3,4
        import matplotlib.pyplot as plt
        import matplotlib.transforms as mtrans

        f = self.open_output('rowe134', wrapper=True, figsize=(10, 6*len(STAR_TYPES)))
        for s in STAR_TYPES:
            ax = plt.subplot(len(STAR_TYPES), 1, s+1)


            for j,i in enumerate([1,3,4]):
                theta, xi, err = rowe_stats[i, s]
                tr = mtrans.offset_copy(ax.transData, f.file, 0.05*(j-1), 0, units='inches')
                plt.errorbar(theta, abs(xi), err, fmt='.', label=rf'$\rho_{i}$', capsize=3, transform=tr)
            plt.bar(0.0,2e-05,width=5,align='edge',color='gray',alpha=0.2)
            plt.bar(5,1e-07,width=245,align='edge',color='gray',alpha=0.2)
            plt.xscale('log')
            plt.yscale('log')
            plt.xlabel(r"$\theta$")
            plt.ylabel(r"$\xi_+(\theta)$")
            plt.legend()
            plt.title(STAR_TYPE_NAMES[s])
        f.close()

        f = self.open_output('rowe25', wrapper=True, figsize=(10, 6*len(STAR_TYPES)))
        for s in STAR_TYPES:
            ax = plt.subplot(len(STAR_TYPES), 1, s+1)
            for j,i in enumerate([2,5]):
                theta, xi, err = rowe_stats[i, s]
                tr = mtrans.offset_copy(ax.transData, f.file, 0.05*j-0.025, 0, units='inches')
                plt.errorbar(theta, abs(xi), err, fmt='.', label=rf'$\rho_{i}$', capsize=3, transform=tr)
                plt.title(STAR_TYPE_NAMES[s])
                plt.bar(0.0,2e-05,width=5,align='edge',color='gray',alpha=0.2)
                plt.bar(5,1e-07,width=245,align='edge',color='gray',alpha=0.2)
                plt.xscale('log')
                plt.yscale('log')
                plt.xlabel(r"$\theta$")
                plt.ylabel(r"$\xi_+(\theta)$")
                plt.legend()
        f.close()

    def save_stats(self, rowe_stats):
        f = self.open_output('rowe_stats')
        g = f.create_group("rowe_statistics")
        for i in 1,2,3,4,5:
            for s in STAR_TYPES:
                theta, xi, err = rowe_stats[i, s]
                name = STAR_TYPE_NAMES[s]
                h = g.create_group(f'rowe_{i}_{name}')
                h.create_dataset('theta', data=theta)
                h.create_dataset('xi_plus', data=xi)
                h.create_dataset('xi_err', data=err)
        f.close()

[docs]class TXStarShearTests(PipelineStage):
    """
    Star cross galaxy and star autocorrelation.
    """

    name = 'TXGalaxyStarShear'

    inputs =[('shear_catalog', ShearCatalog),('star_catalog', HDFFile),
    ('shear_tomography_catalog', TomographyCatalog)]
    outputs = [
        ('star_shear_test', PNGFile),
        ('star_star_test', PNGFile),
        ('star_shear_stats', HDFFile),
    ]

    config_options = {
        'min_sep':0.5,
        'max_sep':250.0,
        'nbins':20,
        'bin_slop':0.1,
        'sep_units':'arcmin',
        'psf_size_units':'sigma',
        'shear_catalog_type':'metacal',
        'flip_g2': False,
    }

    def run(self):
        import treecorr
        import h5py
        import matplotlib
        matplotlib.use('agg')

        ra, dec, e_psf, de_psf, star_type = self.load_stars()
        ra_gal, dec_gal, g1, g2, weight = self.load_galaxies()

        #only use reserved stars for this statistics
        galaxy_star_stats = {}
        star_star_stats = {}
        for t in STAR_TYPES:
            s = star_type == t
            galaxy_star_stats[1, t] = self.compute_galaxy_star(ra, dec, e_psf, s, ra_gal, dec_gal, g1, g2, weight)
            galaxy_star_stats[2, t] = self.compute_galaxy_star(ra, dec, de_psf, s, ra_gal, dec_gal, g1, g2, weight)
            star_star_stats[1, t] = self.compute_star_star(ra, dec, e_psf, s, ra_gal, dec_gal, e_psf, weight)
            star_star_stats[2, t] = self.compute_star_star(ra, dec, de_psf, s, ra_gal, dec_gal, de_psf, weight)

        self.save_stats(galaxy_star_stats,star_star_stats)
        self.galaxy_star_plots(galaxy_star_stats)
        self.star_star_plots(star_star_stats)



    def load_stars(self):
        with self.open_input('star_catalog') as f:
            g = f['stars']
            ra = g['ra'][:]
            dec = g['dec'][:]
            e1 = g['measured_e1'][:]
            e2 = g['measured_e2'][:]
            de1 = e1 - g['model_e1'][:]
            de2 = e2 - g['model_e2'][:]

            e_psf = np.array((e1, e2))
            de_psf = np.array((de1, de2))

            star_type = load_star_type(g)

        return ra, dec, e_psf, de_psf, star_type

    def load_galaxies(self):

        # Columns we need from the shear catalog
        # TODO: not sure of an application where we would want to use true shear but can be added
        read_shear_catalog_type(self)

        # load tomography data
        f = self.open_input('shear_tomography_catalog')
        source_bin = f['tomography/source_bin'][:]
        mask = (source_bin!=-1) # Only use the sources that pass the fiducial cuts
        if self.config['shear_catalog_type']=='metacal':
            R_total_2d = f['metacal_response/R_total_2d'][:]

        f = self.open_input('shear_catalog')
        g = f['shear']
        ra = g['ra'][:][mask]
        dec = g['dec'][:][mask]
        if self.config['shear_catalog_type']=='metacal':
            g1 = g['mcal_g1'][:][mask]
            g2 = g['mcal_g2'][:][mask]
            weight = g['weight'][:][mask]
        else:
            g1 = g['g1'][:][mask]
            g2 = g['g2'][:][mask]
            weight = g['weight'][:][mask]
            sigma_e = g['sigma_e'][:][mask]
            m = g['m'][:][mask]

        if self.config['flip_g2']:
            g2 *= -1

        if self.config['shear_catalog_type']=='metacal':
            # We use S=0 here because we have already included it in R_total
            g1, g2 = apply_metacal_response(R_total_2d, 0.0, g1, g2)

        elif self.config['shear_catalog_type']=='lensfit':
            g1, g2, weight, _ = apply_lensfit_calibration(g1 = g1,g2 = g2,weight = weight,sigma_e = sigma_e, m = m)
        else:
            print('Shear calibration type not recognized.')

        return ra, dec, g1, g2, weight

    def compute_galaxy_star(self, ra, dec, q, s, ra_gal, dec_gal, g1, g2, weight):
        # select the star type
        ra = ra[s]
        dec = dec[s]
        q = q[:, s] # PSF quantity, either ellipticity or residual
        n = len(ra)
        i = len(ra_gal)
        print(f"Computing galaxy-cross-star statistic from {n} stars and {i} galaxies")
        import treecorr
        corr = treecorr.GGCorrelation(self.config)
        cat1 = treecorr.Catalog(ra=ra, dec=dec, g1=q[0], g2=q[1], ra_units='deg', dec_units='deg')
        cat2 = treecorr.Catalog(ra=ra_gal, dec=dec_gal, g1=g2, g2=g2, ra_units='deg', dec_units='deg', w=weight)
        corr.process(cat1, cat2)
        return corr.meanr, corr.xip, corr.varxip**0.5

    def compute_star_star(self, ra, dec, q1, s, ra_gal, dec_gal, q2, weight):
        # select the star type
        ra = ra[s]
        dec = dec[s]
        q1 = q1[:, s]
        q2 = q2[:, s]
        n = len(ra)
        i = len(ra_gal)
        print(f"Computing galaxy-cross-star statistic from {n} stars and {i} galaxies")
        import treecorr
        corr = treecorr.GGCorrelation(self.config)
        cat1 = treecorr.Catalog(ra=ra, dec=dec, g1=q1[0], g2=q1[1], ra_units='deg', dec_units='deg')
        cat2 = treecorr.Catalog(ra=ra, dec=dec, g1=q2[0], g2=q2[1], ra_units='deg', dec_units='deg')
        corr.process(cat1, cat2)
        return corr.meanr, corr.xip, corr.varxip**0.5

    def galaxy_star_plots(self, galaxy_star_stats):
        import matplotlib.pyplot as plt
        import matplotlib.transforms as mtrans

        f = self.open_output('star_shear_test', wrapper=True, figsize=(10, 6*len(STAR_TYPES)))
        TEST_TYPES = ['shear','residual']
        for s in STAR_TYPES:
            ax = plt.subplot(len(STAR_TYPES), 1, s+1)
            for j,i in enumerate([1,2]):
                theta, xi, err = galaxy_star_stats[i, s]
                tr = mtrans.offset_copy(ax.transData, f.file, 0.05*j-0.025, 0, units='inches')
                plt.errorbar(theta, abs(xi), err, fmt='.', label=f'galaxy cross star {TEST_TYPES[i-1]}', capsize=3, transform=tr)
                plt.title(STAR_TYPE_NAMES[s])
                plt.xscale('log')
                plt.yscale('log')
                plt.xlabel(r"$\theta$")
                plt.ylabel(r"$\xi_+(\theta)$")
                plt.legend()
        f.close()

    def star_star_plots(self, star_star_stats):
        import matplotlib.pyplot as plt
        import matplotlib.transforms as mtrans

        f = self.open_output('star_star_test', wrapper=True, figsize=(10, 6*len(STAR_TYPES)))
        TEST_TYPES = ['shear','residual']
        for s in STAR_TYPES:
            ax = plt.subplot(len(STAR_TYPES), 1, s+1)
            for j,i in enumerate([1,2]):
                theta, xi, err = star_star_stats[i, s]
                tr = mtrans.offset_copy(ax.transData, f.file, 0.05*j-0.025, 0, units='inches')
                plt.errorbar(theta, abs(xi), err, fmt='.', label=f'star cross star {TEST_TYPES[i-1]}', capsize=3, transform=tr)
                plt.title(STAR_TYPE_NAMES[s])
                plt.xscale('log')
                plt.yscale('log')
                plt.xlabel(r"$\theta$")
                plt.ylabel(r"$\xi_+(\theta)$")
                plt.legend()
        f.close()

    def save_stats(self, galaxy_star_stats, star_star_stats):

        f = self.open_output('star_shear_stats')
        g = f.create_group("star_cross_galaxy")
        for i in 1,2:
            for s in STAR_TYPES:
                theta, xi, err = galaxy_star_stats[i, s]
                name = STAR_TYPE_NAMES[s]
                h = g.create_group(f'star_cross_galaxy_{i}_{name}')
                h.create_dataset('theta', data=theta)
                h.create_dataset('xi_plus', data=xi)
                h.create_dataset('xi_err', data=err)

        g = f.create_group("star_cross_star")
        for i in 1,2:
            for s in STAR_TYPES:
                theta, xi, err = star_star_stats[i, s]
                name = STAR_TYPE_NAMES[s]
                h = g.create_group(f'star_cross_star_{i}_{name}')
                h.create_dataset('theta', data=theta)
                h.create_dataset('xi_plus', data=xi)
                h.create_dataset('xi_err', data=err)
        f.close()




[docs]class TXStarDensityTests(PipelineStage):
    """
    Star cross galaxy and star autocorrelation density stats.
    """

    name = 'TXGalaxyStarDensity'

    inputs =[('shear_catalog', ShearCatalog),('star_catalog', HDFFile),
    ('shear_tomography_catalog', TomographyCatalog),
    ('random_cats', RandomsCatalog)]
    outputs = [
        ('star_density_test', PNGFile),
        ('star_density_stats', HDFFile),
    ]

    config_options = {
        'min_sep':0.5,
        'max_sep':250.0,
        'nbins':20,
        'bin_slop':0.1,
        'sep_units':'arcmin',
        'psf_size_units':'sigma',
        'flip_g2': False,
    }

    def run(self):
        import treecorr
        import h5py
        import matplotlib
        matplotlib.use('agg')

        ra, dec,  star_type = self.load_stars()
        ra_gal, dec_gal = self.load_galaxies()
        ra_random, dec_random = self.load_randoms()

        galaxy_star_stats = {}
        for t in STAR_TYPES:
            s = star_type == t
            galaxy_star_stats[1, t] = self.compute_galaxy_star(ra, dec, s, ra_gal, dec_gal, ra_random, dec_random)
            galaxy_star_stats[2, t] = self.compute_star_star(ra, dec, s, ra_gal, dec_gal, ra_random, dec_random)

        self.save_stats(galaxy_star_stats)
        self.galaxy_star_plots(galaxy_star_stats)



    def load_stars(self):
        with self.open_input('star_catalog') as f:
            g = f['stars']
            ra = g['ra'][:]
            dec = g['dec'][:]

            star_type = load_star_type(g)

        return ra, dec, star_type

    def load_randoms(self):

        with self.open_input('random_cats') as f:
            group = f['randoms']
            ra_random = group['ra'][:]
            dec_random = group['dec'][:]
        return ra_random, dec_random

    def load_galaxies(self):

        # Columns we need from the shear catalog
        # TODO: not sure of an application where we would want to use true shear but can be added
        read_shear_catalog_type(self)

        # load tomography data
        with self.open_input('shear_tomography_catalog') as f:
            source_bin = f['tomography/source_bin'][:]
            mask = (source_bin!=-1) # Only use the sources that pass the fiducial cuts

        with self.open_input('shear_catalog') as f:
            g = f['shear']
            ra = g['ra'][:][mask]
            dec = g['dec'][:][mask]

        return ra, dec

    def compute_galaxy_star(self, ra, dec, s, ra_gal, dec_gal, ra_random, dec_random):
        # select the star type
        ra = ra[s]
        dec = dec[s]
        n = len(ra)
        i = len(ra_gal)
        print(f"Computing galaxy-cross-star statistic from {n} stars and {i} galaxies")

        import treecorr

        rancat  = treecorr.Catalog(
                ra=ra_random, dec=dec_random,
                ra_units='degree', dec_units='degree')

        cat1 = treecorr.Catalog(ra=ra, dec=dec, ra_units='deg', dec_units='deg')
        cat2 = treecorr.Catalog(ra=ra_gal, dec=dec_gal, ra_units='deg', dec_units='deg')

        nn = treecorr.NNCorrelation(self.config)
        rn = treecorr.NNCorrelation(self.config)
        nr = treecorr.NNCorrelation(self.config)
        rr = treecorr.NNCorrelation(self.config)

        nn.process(cat1,    cat2)
        nr.process(cat1,    rancat)
        rn.process(rancat, cat2)
        rr.process(rancat, rancat)

        nn.calculateXi(rr, dr=nr, rd=rn)
        return nn.meanr, nn.xi, nn.varxi**0.5

    def compute_star_star(self, ra, dec, s, ra_gal, dec_gal, ra_random, dec_random):
        # select the star type
        ra = ra[s]
        dec = dec[s]
        n = len(ra)
        i = len(ra_gal)
        print(f"Computing galaxy-cross-star statistic from {n} stars and {i} galaxies")

        import treecorr

        rancat  = treecorr.Catalog(
                ra=ra_random, dec=dec_random,
                ra_units='degree', dec_units='degree')

        cat1 = treecorr.Catalog(ra=ra, dec=dec, ra_units='deg', dec_units='deg')
        cat2 = treecorr.Catalog(ra=ra_gal, dec=dec_gal, ra_units='deg', dec_units='deg')

        nn = treecorr.NNCorrelation(self.config)
        rn = treecorr.NNCorrelation(self.config)
        nr = treecorr.NNCorrelation(self.config)
        rr = treecorr.NNCorrelation(self.config)

        nn.process(cat1,    cat1)
        nr.process(cat1,    rancat)
        rn.process(rancat, cat1)
        rr.process(rancat, rancat)

        nn.calculateXi(rr, dr=nr, rd=rn)
        return nn.meanr, nn.xi, nn.varxi**0.5

    def galaxy_star_plots(self, galaxy_star_stats):
        import matplotlib.pyplot as plt
        import matplotlib.transforms as mtrans

        f = self.open_output('star_density_test', wrapper=True, figsize=(10, 6*len(STAR_TYPES)))
        TEST_TYPES = ['star cross galaxy','star cross star']
        for s in STAR_TYPES:
            ax = plt.subplot(len(STAR_TYPES), 1, s+1)
            for j,i in enumerate([1,2]):
                theta, xi, err = galaxy_star_stats[i, s]
                tr = mtrans.offset_copy(ax.transData, f.file, 0.05*j-0.025, 0, units='inches')
                plt.errorbar(theta, abs(xi), err, fmt='.', label=f'{TEST_TYPES[i-1]} density stats', capsize=3, transform=tr)
                plt.title(STAR_TYPE_NAMES[s])
                plt.xscale('log')
                plt.yscale('log')
                plt.xlabel(r"$\theta$")
                plt.ylabel(r"$\xi_+(\theta)$")
                plt.legend()
        f.close()

    def save_stats(self, galaxy_star_stats):

        f = self.open_output('star_density_stats')
        g = f.create_group("star_density")
        for i in 1,2:
            for s in STAR_TYPES:
                theta, xi, err = galaxy_star_stats[i, s]
                name = STAR_TYPE_NAMES[s]
                h = g.create_group(f'star_density_{i}_{name}')
                h.create_dataset('theta', data=theta)
                h.create_dataset('xi_plus', data=xi)
                h.create_dataset('xi_err', data=err)
        f.close()

[docs]class TXBrighterFatterPlot(PipelineStage):
    name = 'TXBrighterFatterPlot'

    inputs =[('star_catalog', HDFFile)]

    outputs = [
        ('brighter_fatter_plot', PNGFile),
        ('brighter_fatter_data', HDFFile),
    ]

    config_options = {
        'band': 'r',
        'nbin': 20,
        'mmin': 18.5,
        'mmax': 23.5,
    }

    def run(self):
        import h5py
        import matplotlib
        matplotlib.use('agg')

        data = self.load_stars()
        results = {}
        for s in STAR_TYPES:
            w = data['star_type'] == s
            data_s = {k: v[w] for k, v in data.items()}
            results[s] = self.compute_binned_stats(data_s)

        self.save_stats(results)
        self.save_plots(results)

    def load_stars(self):
        with self.open_input('star_catalog') as f:
            g = f['stars']

            band = self.config['band']
            data = {}
            data['mag'] = g[f'mag_{band}'][:]
            data['delta_e1'] = g['measured_e1'][:] - g['model_e1'][:]
            data['delta_e2'] = g['measured_e2'][:] - g['model_e2'][:]
            data['delta_T'] = g['measured_T'][:] - g['model_T'][:]
            data['star_type'] = load_star_type(g)

        return data

    def compute_binned_stats(self, data):
        # Which band this corresponds to depends on the
        # configuration option chosen
        mag = data['mag']
        mmin = self.config['mmin']
        mmax = self.config['mmax']
        nbin = self.config['nbin']
        # bin edges in magnitude
        edges = np.linspace(mmin, mmax, nbin+1)
        index = np.digitize(mag, edges)

        # Space for all the output values to go in
        dT = np.zeros(nbin)
        errT = np.zeros(nbin)
        e1 = np.zeros(nbin)
        e2 = np.zeros(nbin)
        err1 = np.zeros(nbin)
        err2 = np.zeros(nbin)
        m = np.zeros(nbin)


        for i in range(nbin):
            # Select only objects where everything is finite, as well
            # as only thing in this tomographic bin
            w = np.where((index==i+1) & np.isfinite(data['delta_T']) & np.isfinite(data['delta_e1']) & np.isfinite(data['delta_e2']) )
            # x-value = mean mag
            m[i] = mag[w].mean()
            # y values
            dT_i = data['delta_T'][w]
            e1_i = data['delta_e1'][w]
            e2_i = data['delta_e2'][w]
            # Mean and error on mean of each of these quantities
            dT[i] = dT_i.mean()
            errT[i] = dT_i.std() / np.sqrt(dT_i.size)
            e1[i] = e1_i.mean()
            err1[i] = e1_i.std() / np.sqrt(e1_i.size)
            e2[i] = e2_i.mean()
            err2[i] = e2_i.std() / np.sqrt(e2_i.size)

        return [m, dT, errT, e1, err1, e2, err2]

    def save_plots(self, results):
        import matplotlib.pyplot as plt
        band = self.config['band']
        n = len(results)
        width = n * 6
        f = self.open_output('brighter_fatter_plot', wrapper=True, figsize=(width,8))
        for s, res in results.items():
            m, dT, errT, e1, err1, e2, err2 = res

            # Top plot - classic BF size plot, the size residual as a function of
            # magnitude
            ax = plt.subplot(2,n,2*s+1)
            plt.title(STAR_TYPE_NAMES[s])
            plt.errorbar(m, dT, errT, fmt='.')
            plt.xlabel(f"{band}-band magnitude")
            plt.ylabel(r"$T_\mathrm{PSF} - T_\mathrm{model}$ ($\mathrm{arcsec}^2$)")
            plt.ylim(-0.025, 0.1)
            # Lower plot - the e1 and e2 residuals as a function of mag
            plt.subplot(2,n,2*s+2, sharex=ax)
            plt.title(STAR_TYPE_NAMES[s])
            plt.errorbar(m, e1, err1, label='$e_1$', fmt='.')
            plt.errorbar(m, e2, err2, label='$e_2$', fmt='.')
            plt.ylabel(r"$e_\mathrm{PSF} - e_\mathrm{model}$")
            plt.xlabel(f"{band}-band magnitude")
            # May need to adjust this range
            plt.ylim(-0.02, 0.02)
        plt.legend()
        plt.tight_layout()
        f.close()

    def save_stats(self, results):
        # Save all the stats in results for later plotting
        # Save to standard HDF5 format.
        f = self.open_output('brighter_fatter_data')
        g1 = f.create_group('brighter_fatter')
        g1.attrs['band'] = self.config['band']
        for s, res in results.items():
            (m, dT, errT, e1, err1, e2, err2) = res
            g = g1.create_group(STAR_TYPE_NAMES[s])
            g.create_dataset('mag', data=m)
            g.create_dataset('delta_T', data=dT)
            g.create_dataset('delta_T_error', data=errT)
            g.create_dataset('delta_e1', data=e1)
            g.create_dataset('delta_e1_error', data=err1)
            g.create_dataset('delta_e2', data=e2)
            g.create_dataset('delta_e2_error', data=err2)
        f.close()



def load_star_type(data):
    used = data['calib_psf_used'][:]
    reserved = data['calib_psf_reserved'][:]

    star_type = np.zeros(used.size, dtype=int)
    star_type[used] = STAR_PSF_USED
    star_type[reserved] = STAR_PSF_RESERVED

    return star_type