Skip to content

Commit 23fdbe9

Browse files
committed
Merge branch 'tickets/DM-12207'
2 parents 22030a4 + 97cef17 commit 23fdbe9

File tree

5 files changed

+893
-1
lines changed

5 files changed

+893
-1
lines changed

python/lsst/meas/algorithms/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
from .ingestIndexReferenceTask import *
6363
from .loadIndexedReferenceObjects import *
6464
from .indexerRegistry import *
65+
from .reserveSourcesTask import *
6566

6667
from .version import *
6768

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
#
2+
# LSST Data Management System
3+
#
4+
# Copyright 2008-2017 AURA/LSST.
5+
#
6+
# This product includes software developed by the
7+
# LSST Project (http://www.lsst.org/).
8+
#
9+
# This program is free software: you can redistribute it and/or modify
10+
# it under the terms of the GNU General Public License as published by
11+
# the Free Software Foundation, either version 3 of the License, or
12+
# (at your option) any later version.
13+
#
14+
# This program is distributed in the hope that it will be useful,
15+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
16+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17+
# GNU General Public License for more details.
18+
#
19+
# You should have received a copy of the LSST License Statement and
20+
# the GNU General Public License along with this program. If not,
21+
# see <https://www.lsstcorp.org/LegalNotices/>.
22+
#
23+
from __future__ import absolute_import, division, print_function
24+
25+
from builtins import zip
26+
27+
import numpy as np
28+
29+
from lsst.pex.config import Config, Field
30+
from lsst.pipe.base import Task, Struct
31+
32+
import lsst.afw.table
33+
34+
__all__ = ["ReserveSourcesConfig", "ReserveSourcesTask"]
35+
36+
37+
class ReserveSourcesConfig(Config):
38+
"""Configuration for reserving sources"""
39+
fraction = Field(dtype=float, default=0.0,
40+
doc="Fraction of candidates to reserve from fitting; none if <= 0")
41+
seed = Field(dtype=int, default=1,
42+
doc=("This number will be added to the exposure ID to set the random seed for "
43+
"reserving candidates"))
44+
45+
46+
class ReserveSourcesTask(Task):
47+
"""Reserve sources from analysis
48+
49+
We randomly select a fraction of sources that will be reserved
50+
from analysis. This allows evaluation of the quality of model fits
51+
using sources that were not involved in the fitting process.
52+
53+
Constructor parameters
54+
----------------------
55+
columnName : `str`, required
56+
Name of flag column to add; we will suffix this with "_reserved".
57+
schema : `lsst.afw.table.Schema`, required
58+
Catalog schema.
59+
doc : `str`
60+
Documentation for column to add.
61+
config : `ReserveSourcesConfig`
62+
Configuration.
63+
"""
64+
ConfigClass = ReserveSourcesConfig
65+
_DefaultName = "reserveSources"
66+
67+
def __init__(self, columnName=None, schema=None, doc=None, **kwargs):
68+
Task.__init__(self, **kwargs)
69+
assert columnName is not None, "columnName not provided"
70+
assert schema is not None, "schema not provided"
71+
self.columnName = columnName
72+
self.key = schema.addField(self.columnName + "_reserved", type="Flag", doc=doc)
73+
74+
def run(self, sources, prior=None, expId=0):
75+
"""Select sources to be reserved
76+
77+
Reserved sources will be flagged in the catalog, and we will return
78+
boolean arrays that identify the sources to be reserved from and
79+
used in the analysis. Typically you'll want to use the sources
80+
from the `use` array in your fitting, and use the sources from the
81+
`reserved` array as an independent test of your fitting.
82+
83+
Parameters
84+
----------
85+
sources : `lsst.afw.table.Catalog` or `list` of `lsst.afw.table.Record`
86+
Sources from which to select some to be reserved.
87+
prior : `numpy.ndarray` of type `bool`, optional
88+
Prior selection of sources. Should have the same length as
89+
`sources`. If set, we will only consider for reservation sources
90+
that are flagged `True` in this array.
91+
expId : `int`
92+
Exposure identifier; used for seeding the random number generator.
93+
94+
Return struct contents
95+
----------------------
96+
reserved : `numpy.ndarray` of type `bool`
97+
Sources to be reserved are flagged `True` in this array.
98+
use : `numpy.ndarray` of type `bool`
99+
Sources the user should use in analysis are flagged `True`.
100+
"""
101+
if prior is not None:
102+
assert len(prior) == len(sources), "Length mismatch: %s vs %s" % (len(prior), len(sources))
103+
numSources = prior.sum()
104+
else:
105+
numSources = len(sources)
106+
selection = self.select(numSources, expId)
107+
if prior is not None:
108+
selection = self.applySelectionPrior(prior, selection)
109+
self.markSources(sources, selection)
110+
self.log.info("Reserved %d/%d sources", selection.sum(), len(selection))
111+
return Struct(reserved=selection,
112+
use=prior & ~selection if prior is not None else np.logical_not(selection))
113+
114+
def select(self, numSources, expId=0):
115+
"""Randomly select some sources
116+
117+
We return a boolean array with a random selection. The fraction
118+
of sources selected is specified by the config parameter `fraction`.
119+
120+
Parameters
121+
----------
122+
numSources : `int`
123+
Number of sources in catalog from which to select.
124+
expId : `int`
125+
Exposure identifier; used for seeding the random number generator.
126+
127+
Returns
128+
-------
129+
selection : `numpy.ndarray` of type `bool`
130+
Selected sources are flagged `True` in this array.
131+
"""
132+
selection = np.zeros(numSources, dtype=bool)
133+
if self.config.fraction <= 0:
134+
return selection
135+
reserve = int(np.round(numSources*self.config.fraction))
136+
selection[:reserve] = True
137+
rng = np.random.RandomState(self.config.seed + expId)
138+
rng.shuffle(selection)
139+
return selection
140+
141+
def applySelectionPrior(self, prior, selection):
142+
"""Apply selection to full catalog
143+
144+
The `select` method makes a random selection of sources. If those
145+
sources don't represent the full population (because a sub-selection
146+
has already been made), then we need to generate a selection covering
147+
the entire population.
148+
149+
Parameters
150+
----------
151+
prior : `numpy.ndarray` of type `bool`
152+
Prior selection of sources, identifying the subset from which the
153+
random selection has been made.
154+
selection : `numpy.ndarray` of type `bool`
155+
Selection of sources in subset identified by `prior`.
156+
157+
Returns
158+
-------
159+
full : `numpy.ndarray` of type `bool`
160+
Selection applied to full population.
161+
"""
162+
full = np.zeros(len(prior), dtype=bool)
163+
full[prior] = selection
164+
return full
165+
166+
def markSources(self, sources, selection):
167+
"""Mark sources in a list or catalog
168+
169+
This requires iterating through the list and setting the flag in
170+
each source individually. Even if the `sources` is a `Catalog`
171+
with contiguous records, it's not currently possible to set a boolean
172+
column (DM-6981) so we need to iterate.
173+
174+
Parameters
175+
----------
176+
catalog : `lsst.afw.table.Catalog` or `list` of `lsst.afw.table.Record`
177+
Catalog in which to flag selected sources.
178+
selection : `numpy.ndarray` of type `bool`
179+
Selection of sources to mark.
180+
"""
181+
for src, select in zip(sources, selection):
182+
if select:
183+
src.set(self.key, True)

0 commit comments

Comments
 (0)