| 
 | 1 | +#  | 
 | 2 | +# LSST Data Management System  | 
 | 3 | +#  | 
 | 4 | +# Copyright 2008-2017  AURA/LSST.  | 
 | 5 | +#  | 
 | 6 | +# This product includes software developed by the  | 
 | 7 | +# LSST Project (http://www.lsst.org/).  | 
 | 8 | +#  | 
 | 9 | +# This program is free software: you can redistribute it and/or modify  | 
 | 10 | +# it under the terms of the GNU General Public License as published by  | 
 | 11 | +# the Free Software Foundation, either version 3 of the License, or  | 
 | 12 | +# (at your option) any later version.  | 
 | 13 | +#  | 
 | 14 | +# This program is distributed in the hope that it will be useful,  | 
 | 15 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of  | 
 | 16 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the  | 
 | 17 | +# GNU General Public License for more details.  | 
 | 18 | +#  | 
 | 19 | +# You should have received a copy of the LSST License Statement and  | 
 | 20 | +# the GNU General Public License along with this program.  If not,  | 
 | 21 | +# see <https://www.lsstcorp.org/LegalNotices/>.  | 
 | 22 | +#  | 
 | 23 | +from __future__ import absolute_import, division, print_function  | 
 | 24 | + | 
 | 25 | +from builtins import zip  | 
 | 26 | + | 
 | 27 | +import numpy as np  | 
 | 28 | + | 
 | 29 | +from lsst.pex.config import Config, Field  | 
 | 30 | +from lsst.pipe.base import Task, Struct  | 
 | 31 | + | 
 | 32 | +import lsst.afw.table  | 
 | 33 | + | 
 | 34 | +__all__ = ["ReserveSourcesConfig", "ReserveSourcesTask"]  | 
 | 35 | + | 
 | 36 | + | 
 | 37 | +class ReserveSourcesConfig(Config):  | 
 | 38 | +    """Configuration for reserving sources"""  | 
 | 39 | +    fraction = Field(dtype=float, default=0.0,  | 
 | 40 | +                     doc="Fraction of candidates to reserve from fitting; none if <= 0")  | 
 | 41 | +    seed = Field(dtype=int, default=1,  | 
 | 42 | +                 doc=("This number will be added to the exposure ID to set the random seed for "  | 
 | 43 | +                      "reserving candidates"))  | 
 | 44 | + | 
 | 45 | + | 
 | 46 | +class ReserveSourcesTask(Task):  | 
 | 47 | +    """Reserve sources from analysis  | 
 | 48 | +
  | 
 | 49 | +    We randomly select a fraction of sources that will be reserved  | 
 | 50 | +    from analysis. This allows evaluation of the quality of model fits  | 
 | 51 | +    using sources that were not involved in the fitting process.  | 
 | 52 | +
  | 
 | 53 | +    Constructor parameters  | 
 | 54 | +    ----------------------  | 
 | 55 | +    columnName : `str`, required  | 
 | 56 | +        Name of flag column to add; we will suffix this with "_reserved".  | 
 | 57 | +    schema : `lsst.afw.table.Schema`, required  | 
 | 58 | +        Catalog schema.  | 
 | 59 | +    doc : `str`  | 
 | 60 | +        Documentation for column to add.  | 
 | 61 | +    config : `ReserveSourcesConfig`  | 
 | 62 | +        Configuration.  | 
 | 63 | +    """  | 
 | 64 | +    ConfigClass = ReserveSourcesConfig  | 
 | 65 | +    _DefaultName = "reserveSources"  | 
 | 66 | + | 
 | 67 | +    def __init__(self, columnName=None, schema=None, doc=None, **kwargs):  | 
 | 68 | +        Task.__init__(self, **kwargs)  | 
 | 69 | +        assert columnName is not None, "columnName not provided"  | 
 | 70 | +        assert schema is not None, "schema not provided"  | 
 | 71 | +        self.columnName = columnName  | 
 | 72 | +        self.key = schema.addField(self.columnName + "_reserved", type="Flag", doc=doc)  | 
 | 73 | + | 
 | 74 | +    def run(self, sources, prior=None, expId=0):  | 
 | 75 | +        """Select sources to be reserved  | 
 | 76 | +
  | 
 | 77 | +        Reserved sources will be flagged in the catalog, and we will return  | 
 | 78 | +        boolean arrays that identify the sources to be reserved from and  | 
 | 79 | +        used in the analysis. Typically you'll want to use the sources  | 
 | 80 | +        from the `use` array in your fitting, and use the sources from the  | 
 | 81 | +        `reserved` array as an independent test of your fitting.  | 
 | 82 | +
  | 
 | 83 | +        Parameters  | 
 | 84 | +        ----------  | 
 | 85 | +        sources : `lsst.afw.table.Catalog` or `list` of `lsst.afw.table.Record`  | 
 | 86 | +            Sources from which to select some to be reserved.  | 
 | 87 | +        prior : `numpy.ndarray` of type `bool`, optional  | 
 | 88 | +            Prior selection of sources. Should have the same length as  | 
 | 89 | +            `sources`. If set, we will only consider for reservation sources  | 
 | 90 | +            that are flagged `True` in this array.  | 
 | 91 | +        expId : `int`  | 
 | 92 | +            Exposure identifier; used for seeding the random number generator.  | 
 | 93 | +
  | 
 | 94 | +        Return struct contents  | 
 | 95 | +        ----------------------  | 
 | 96 | +        reserved : `numpy.ndarray` of type `bool`  | 
 | 97 | +            Sources to be reserved are flagged `True` in this array.  | 
 | 98 | +        use : `numpy.ndarray` of type `bool`  | 
 | 99 | +            Sources the user should use in analysis are flagged `True`.  | 
 | 100 | +        """  | 
 | 101 | +        if prior is not None:  | 
 | 102 | +            assert len(prior) == len(sources), "Length mismatch: %s vs %s" % (len(prior), len(sources))  | 
 | 103 | +            numSources = prior.sum()  | 
 | 104 | +        else:  | 
 | 105 | +            numSources = len(sources)  | 
 | 106 | +        selection = self.select(numSources, expId)  | 
 | 107 | +        if prior is not None:  | 
 | 108 | +            selection = self.applySelectionPrior(prior, selection)  | 
 | 109 | +        self.markSources(sources, selection)  | 
 | 110 | +        self.log.info("Reserved %d/%d sources", selection.sum(), len(selection))  | 
 | 111 | +        return Struct(reserved=selection,  | 
 | 112 | +                      use=prior & ~selection if prior is not None else np.logical_not(selection))  | 
 | 113 | + | 
 | 114 | +    def select(self, numSources, expId=0):  | 
 | 115 | +        """Randomly select some sources  | 
 | 116 | +
  | 
 | 117 | +        We return a boolean array with a random selection. The fraction  | 
 | 118 | +        of sources selected is specified by the config parameter `fraction`.  | 
 | 119 | +
  | 
 | 120 | +        Parameters  | 
 | 121 | +        ----------  | 
 | 122 | +        numSources : `int`  | 
 | 123 | +            Number of sources in catalog from which to select.  | 
 | 124 | +        expId : `int`  | 
 | 125 | +            Exposure identifier; used for seeding the random number generator.  | 
 | 126 | +
  | 
 | 127 | +        Returns  | 
 | 128 | +        -------  | 
 | 129 | +        selection : `numpy.ndarray` of type `bool`  | 
 | 130 | +            Selected sources are flagged `True` in this array.  | 
 | 131 | +        """  | 
 | 132 | +        selection = np.zeros(numSources, dtype=bool)  | 
 | 133 | +        if self.config.fraction <= 0:  | 
 | 134 | +            return selection  | 
 | 135 | +        reserve = int(np.round(numSources*self.config.fraction))  | 
 | 136 | +        selection[:reserve] = True  | 
 | 137 | +        rng = np.random.RandomState(self.config.seed + expId)  | 
 | 138 | +        rng.shuffle(selection)  | 
 | 139 | +        return selection  | 
 | 140 | + | 
 | 141 | +    def applySelectionPrior(self, prior, selection):  | 
 | 142 | +        """Apply selection to full catalog  | 
 | 143 | +
  | 
 | 144 | +        The `select` method makes a random selection of sources. If those  | 
 | 145 | +        sources don't represent the full population (because a sub-selection  | 
 | 146 | +        has already been made), then we need to generate a selection covering  | 
 | 147 | +        the entire population.  | 
 | 148 | +
  | 
 | 149 | +        Parameters  | 
 | 150 | +        ----------  | 
 | 151 | +        prior : `numpy.ndarray` of type `bool`  | 
 | 152 | +            Prior selection of sources, identifying the subset from which the  | 
 | 153 | +            random selection has been made.  | 
 | 154 | +        selection : `numpy.ndarray` of type `bool`  | 
 | 155 | +            Selection of sources in subset identified by `prior`.  | 
 | 156 | +
  | 
 | 157 | +        Returns  | 
 | 158 | +        -------  | 
 | 159 | +        full : `numpy.ndarray` of type `bool`  | 
 | 160 | +            Selection applied to full population.  | 
 | 161 | +        """  | 
 | 162 | +        full = np.zeros(len(prior), dtype=bool)  | 
 | 163 | +        full[prior] = selection  | 
 | 164 | +        return full  | 
 | 165 | + | 
 | 166 | +    def markSources(self, sources, selection):  | 
 | 167 | +        """Mark sources in a list or catalog  | 
 | 168 | +
  | 
 | 169 | +        This requires iterating through the list and setting the flag in  | 
 | 170 | +        each source individually. Even if the `sources` is a `Catalog`  | 
 | 171 | +        with contiguous records, it's not currently possible to set a boolean  | 
 | 172 | +        column (DM-6981) so we need to iterate.  | 
 | 173 | +
  | 
 | 174 | +        Parameters  | 
 | 175 | +        ----------  | 
 | 176 | +        catalog : `lsst.afw.table.Catalog` or `list` of `lsst.afw.table.Record`  | 
 | 177 | +            Catalog in which to flag selected sources.  | 
 | 178 | +        selection : `numpy.ndarray` of type `bool`  | 
 | 179 | +            Selection of sources to mark.  | 
 | 180 | +        """  | 
 | 181 | +        for src, select in zip(sources, selection):  | 
 | 182 | +            if select:  | 
 | 183 | +                src.set(self.key, True)  | 
0 commit comments