From 455a20f87becbbdc350d53f72687565ea969b0b4 Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Wed, 7 Apr 2021 15:05:13 -0500 Subject: [PATCH 01/20] Checkout typing updates from PR 161 --- src/hist/typing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/hist/typing.py b/src/hist/typing.py index 177b668e..d3c3aa6d 100644 --- a/src/hist/typing.py +++ b/src/hist/typing.py @@ -2,9 +2,9 @@ from typing import TYPE_CHECKING, Any, Tuple if sys.version_info < (3, 8): - from typing_extensions import Protocol, SupportsIndex + from typing_extensions import Literal, Protocol, SupportsIndex else: - from typing import Protocol, SupportsIndex + from typing import Literal, Protocol, SupportsIndex if TYPE_CHECKING: from numpy import ufunc as Ufunc @@ -14,7 +14,7 @@ Ufunc = Any -__all__ = ("Protocol", "SupportsIndex", "Ufunc", "ArrayLike") +__all__ = ("Literal", "Protocol", "SupportsIndex", "Ufunc", "ArrayLike") def __dir__() -> Tuple[str, ...]: From 79299eabc6d2fe8918f2c7cedcb2ad7bd5c3f8d4 Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Wed, 7 Apr 2021 15:08:39 -0500 Subject: [PATCH 02/20] Pull intervals module from PR 161 --- src/hist/intervals.py | 111 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 src/hist/intervals.py diff --git a/src/hist/intervals.py b/src/hist/intervals.py new file mode 100644 index 00000000..40e1abb9 --- /dev/null +++ b/src/hist/intervals.py @@ -0,0 +1,111 @@ +from typing import Any, Optional + +import numpy as np +from scipy import stats + +from .typing import Literal + +__all__ = ("poisson_interval", "clopper_pearson_interval", "ratio_uncertainty") + + +# Code originally contributed to coffea by Nicholas Smith +# Copyright (c) 2018, Fermilab +def poisson_interval( + values: np.ndarray, variances: np.ndarray, coverage: "Optional[float]" = None +) -> np.ndarray: + r""" + The Frequentist coverage interval for Poisson-distributed observations. + + What is calculated is the "Garwood" interval, + c.f. https://www.ine.pt/revstat/pdf/rs120203.pdf or + http://ms.mcmaster.ca/peter/s743/poissonalpha.html. + For weighted data, this approximates the observed count by + ``values**2/variances``, which effectively scales the unweighted + Poisson interval by the average weight. + This may not be the optimal solution: see https://arxiv.org/abs/1309.1287 + for a proper treatment. + + When a bin is zero, the scale of the nearest nonzero bin is substituted to + scale the nominal upper bound. + If all bins zero, a warning is generated and interval is set to ``values``. + + Args: + values: Sum of weights. + variances: Sum of weights squared. + coverage: Central coverage interval. + Default is one standard deviation, which is roughly ``0.68``. + + Returns: + The Poisson central coverage interval. + """ + if coverage is None: + coverage = stats.norm.cdf(1) - stats.norm.cdf(-1) + scale = np.empty_like(values) + scale[values != 0] = variances[values != 0] / values[values != 0] + if np.sum(values == 0) > 0: + missing = np.where(values == 0) + available = np.nonzero(values) + if len(available[0]) == 0: + raise RuntimeError( + "All values are zero! Cannot compute meaningful error bars", + ) + nearest = np.sum( + [np.square(np.subtract.outer(d, d0)) for d, d0 in zip(available, missing)] + ).argmin(axis=0) + argnearest = tuple(dim[nearest] for dim in available) + scale[missing] = scale[argnearest] + counts = values / scale + interval_min = scale * stats.chi2.ppf((1 - coverage) / 2, 2 * counts) / 2.0 + interval_max = scale * stats.chi2.ppf((1 + coverage) / 2, 2 * (counts + 1)) / 2.0 + interval = np.stack((interval_min, interval_max)) + interval[interval == np.nan] = 0.0 # chi2.ppf produces nan for counts=0 + return interval + + +# Code originally contributed to coffea by Nicholas Smith +# Copyright (c) 2018, Fermilab +def clopper_pearson_interval( + num: np.ndarray, denom: np.ndarray, coverage: "Optional[float]" = None +) -> np.ndarray: + r""" + Compute the Clopper-Pearson coverage interval for a binomial distribution. + c.f. http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval + + Args: + num: Numerator or number of successes. + denom: Denominator or number of trials. + coverage: Central coverage interval. + Default is one standard deviation, which is roughly ``0.68``. + + Returns: + The Clopper-Pearson central coverage interval. + """ + if coverage is None: + coverage = stats.norm.cdf(1) - stats.norm.cdf(-1) + # Numerator is subset of denominator + if np.any(num > denom): + raise ValueError( + "Found numerator larger than denominator while calculating binomial uncertainty" + ) + interval_min = stats.beta.ppf((1 - coverage) / 2, num, denom - num + 1) + interval_max = stats.beta.ppf((1 + coverage) / 2, num + 1, denom - num) + interval = np.stack((interval_min, interval_max)) + interval[:, num == 0.0] = 0.0 + interval[1, num == denom] = 1.0 + return interval + + +def ratio_uncertainty( + num: np.ndarray, + denom: np.ndarray, + uncert_type: Literal["poisson", "poisson-ratio"] = "poisson", +) -> Any: + # Note: As return is a numpy ufuncs the type is "Any" + with np.errstate(divide="ignore"): + ratio = num / denom + if uncert_type == "poisson": + ratio_uncert = np.abs(poisson_interval(ratio, num / np.square(denom)) - ratio) + elif uncert_type == "poisson-ratio": + # poisson ratio n/m is equivalent to binomial n/(n+m) + ratio_uncert = np.abs(clopper_pearson_interval(num, num + denom) - ratio) + return ratio_uncert From fe934fff3e622638d4a9dff76708b4509a13f277 Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Wed, 7 Apr 2021 15:11:08 -0500 Subject: [PATCH 03/20] Update changelot and docs --- docs/changelog.rst | 3 +++ docs/reference/hist.rst | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index 920fdb97..c42f2204 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,9 @@ Changelog IN PROGRESS -------------------- +* Add frequentist coverage interval support in the ``intervals`` module. + `#176 `_ + * Allow ``plot_pull`` to take a more generic callable or a string as a fitting function. Introduce an option to perform a likelihood fit. Write fit parameters' values and uncertainties in the legend. diff --git a/docs/reference/hist.rst b/docs/reference/hist.rst index 1892b6fc..4ae78a58 100644 --- a/docs/reference/hist.rst +++ b/docs/reference/hist.rst @@ -52,6 +52,14 @@ hist.hist module :undoc-members: :show-inheritance: +hist.intervals module +--------------------- + +.. automodule:: hist.intervals + :members: + :undoc-members: + :show-inheritance: + hist.namedhist module --------------------- From b12fb808a6c800d60f0bab0d1cfff0bbf335ee71 Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Wed, 7 Apr 2021 15:14:21 -0500 Subject: [PATCH 04/20] Add intervals to module lists --- src/hist/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hist/__init__.py b/src/hist/__init__.py index 8b9a96a5..015d59ba 100644 --- a/src/hist/__init__.py +++ b/src/hist/__init__.py @@ -7,13 +7,12 @@ from types import ModuleType from typing import Tuple -from . import accumulators, axis, numpy, storage, tag +from . import accumulators, axis, intervals, numpy, storage, tag from .basehist import BaseHist from .hist import Hist from .namedhist import NamedHist from .tag import loc, overflow, rebin, sum, underflow -# Convenient access to the version number # Convenient access to the version number from .version import version as __version__ @@ -24,6 +23,7 @@ "NamedHist", "accumulators", "axis", + "intervals", "loc", "numpy", "overflow", From 3c3fdca6234ebbb92e5e6ff5d6a3fad7e66d98cb Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Wed, 7 Apr 2021 15:15:07 -0500 Subject: [PATCH 05/20] Add tests for intervals --- tests/test_intervals.py | 259 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 259 insertions(+) create mode 100644 tests/test_intervals.py diff --git a/tests/test_intervals.py b/tests/test_intervals.py new file mode 100644 index 00000000..c6dd5493 --- /dev/null +++ b/tests/test_intervals.py @@ -0,0 +1,259 @@ +import numpy as np +import pytest +from pytest import approx + +from hist import Hist, axis, intervals + + +@pytest.fixture(scope="session") +def hist_fixture(): + np.random.seed(42) + + hist_1 = Hist( + axis.Regular( + 30, -3, 3, name="X", label="x [units]", underflow=False, overflow=False + ) + ).fill(np.random.normal(size=1000)) + hist_2 = Hist( + axis.Regular( + 30, -3, 3, name="X", label="x [units]", underflow=False, overflow=False + ) + ).fill(np.random.normal(size=1700)) + + return hist_1, hist_2 + + +def test_poisson_interval(hist_fixture): + hist_1, hist_2 = hist_fixture + + interval_min, interval_max = intervals.poisson_interval( + hist_1.values(), hist_2.values() + ) + interval_min[np.isnan(interval_min)] = np.inf + + assert approx(interval_min) == np.array( + [ + np.inf, + 1.367295313890434, + 0.11640648835506541, + 0.16813691995933283, + 6.218902013333934, + 5.581885639953393, + 12.073194296966197, + 24.41791172658764, + 26.448630661223767, + 29.586027214407356, + 44.61442023134141, + 52.513856022921146, + 77.22286346848537, + 56.48053933662985, + 59.181466346149975, + 73.46017971168686, + 76.98381465233572, + 51.11422215924317, + 60.59758574929647, + 38.220957585930776, + 35.33980227713647, + 12.694821887253045, + 28.556630883837485, + 9.916925984144548, + 11.923130919510358, + 6.553001854991516, + 0.8338910101291227, + 3.279073015531129, + 0.3455075580468999, + np.inf, + ] + ) + assert approx(interval_max) == np.array( + [ + 1.8410216450092634, + 5.918185832883396, + 9.436904357727963, + 8.497457342237139, + 12.809793765013492, + 16.997260128680114, + 23.61395642678031, + 39.15367169418417, + 43.44876191270042, + 48.52076138259468, + 65.17807066898288, + 75.39575495375637, + 97.94286113578973, + 81.65821409089807, + 84.97094651670344, + 98.2239031484522, + 100.48976217732788, + 77.37159837327934, + 83.05078530924457, + 60.007720159359465, + 54.559021073997485, + 30.628601280110384, + 42.75475530038196, + 24.995626256846915, + 21.276544584200877, + 17.752810841884095, + 7.956394295315844, + 10.367005923085337, + 6.59905311823171, + 1.8410216450092634, + ] + ) + + +def test_clopper_pearson_interval(hist_fixture): + hist_1, _ = hist_fixture + + interval_min, interval_max = intervals.clopper_pearson_interval( + hist_1.values() * 0.8, hist_1.values() + ) + + assert approx(interval_min) == np.array( + [ + 0.0, + 0.356101599132999, + 0.24823747856790412, + 0.24823747856790412, + 0.5796381721364965, + 0.5945462492365069, + 0.6553316088046119, + 0.7017549604674665, + 0.7073134299532806, + 0.7135499379572423, + 0.7303918280217871, + 0.7366025731666422, + 0.747712907749326, + 0.7394499309400637, + 0.7409965720106119, + 0.7469889633432774, + 0.7480646510754927, + 0.7366025731666422, + 0.7409965720106119, + 0.725190715883876, + 0.7210714404984849, + 0.669881130877133, + 0.7089853912674083, + 0.6494644930140533, + 0.6494644930140533, + 0.6071564532535227, + 0.356101599132999, + 0.5119169044053422, + 0.24823747856790412, + 0.0, + ] + ) + + assert approx(interval_max) == np.array( + [ + 1.0, + 0.9877641297108364, + 0.9967763463205565, + 0.9967763463205565, + 0.9338657953092819, + 0.9280461985173104, + 0.9006525539880732, + 0.8750821652665013, + 0.8716966334707261, + 0.8678052739470051, + 0.8567701018486796, + 0.8524911283824559, + 0.8445254405242179, + 0.8504887845995179, + 0.8493900886167576, + 0.8450572872847111, + 0.8442663627032932, + 0.8524911283824559, + 0.8493900886167576, + 0.8602638210720486, + 0.8629753273954497, + 0.8931224604654777, + 0.8706631844333439, + 0.9035746974457707, + 0.9035746974457707, + 0.922861360677511, + 0.9877641297108364, + 0.9564011091136548, + 0.9967763463205565, + 1.0, + ] + ) + + +def test_ratio_uncert_poisson(hist_fixture): + hist_1, hist_2 = hist_fixture + + with np.errstate(divide="ignore", invalid="ignore"): + uncert_min, uncert_max = intervals.ratio_uncertainty( + hist_1.values(), hist_2.values(), uncert_type="poisson" + ) + uncert_min[np.isnan(uncert_min)] = np.inf + + assert approx(uncert_min) == np.array( + [ + np.inf, + 0.5442348953698554, + 0.1845449371687184, + 0.2153024266968381, + 0.36793262965634477, + 0.14803306854103054, + 0.16328735597366828, + 0.12585375740467664, + 0.10003669671301474, + 0.08523837652614685, + 0.0823105658636859, + 0.07131648130726226, + 0.09697312390629986, + 0.061387187171010604, + 0.05961885328676131, + 0.06866697092973495, + 0.07674569867624004, + 0.05535754842731544, + 0.0771216359030582, + 0.07117504367911598, + 0.08694657891720786, + 0.07918657111096644, + 0.1401827326329852, + 0.10148207767702055, + 0.23281182525904753, + 0.15550378099768142, + 0.2721174476849277, + 0.2974914224119715, + 0.3229536400452572, + np.inf, + ] + ) + + assert approx(uncert_max) == np.array( + [ + 0.6136738816697545, + 0.972728610961132, + 0.376837089065035, + 0.4396432705758742, + 0.5137755172084555, + 0.2031880838575899, + 0.20814877559535383, + 0.15062730088147014, + 0.11876035397215612, + 0.1002567163955489, + 0.09431292052386309, + 0.08089436452856247, + 0.10794941653181667, + 0.06930343127573091, + 0.06713287739738094, + 0.07653527870763621, + 0.08538026910447305, + 0.06279212910958337, + 0.08684161204615337, + 0.08223020537095749, + 0.10109881672756582, + 0.09904498624127606, + 0.16600975376913252, + 0.13033501446195506, + 0.2990038567068378, + 0.21031050110314442, + 0.486364305480566, + 0.44795519324807165, + 0.6594649058638113, + 0.6136738816697545, + ] + ) From 19fa9a4c6296688a26f4c886ea937b6b55722fd6 Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Wed, 7 Apr 2021 15:20:01 -0500 Subject: [PATCH 06/20] Revise given Lindesy's suggestions --- src/hist/intervals.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/hist/intervals.py b/src/hist/intervals.py index 40e1abb9..a5a7299d 100644 --- a/src/hist/intervals.py +++ b/src/hist/intervals.py @@ -8,8 +8,6 @@ __all__ = ("poisson_interval", "clopper_pearson_interval", "ratio_uncertainty") -# Code originally contributed to coffea by Nicholas Smith -# Copyright (c) 2018, Fermilab def poisson_interval( values: np.ndarray, variances: np.ndarray, coverage: "Optional[float]" = None ) -> np.ndarray: @@ -38,6 +36,8 @@ def poisson_interval( Returns: The Poisson central coverage interval. """ + # Parts originally contributed to coffea + # https://github.com/CoffeaTeam/coffea/blob/8c58807e199a7694bf15e3803dbaf706d34bbfa0/LICENSE if coverage is None: coverage = stats.norm.cdf(1) - stats.norm.cdf(-1) scale = np.empty_like(values) @@ -62,8 +62,6 @@ def poisson_interval( return interval -# Code originally contributed to coffea by Nicholas Smith -# Copyright (c) 2018, Fermilab def clopper_pearson_interval( num: np.ndarray, denom: np.ndarray, coverage: "Optional[float]" = None ) -> np.ndarray: @@ -80,6 +78,8 @@ def clopper_pearson_interval( Returns: The Clopper-Pearson central coverage interval. """ + # Parts originally contributed to coffea + # https://github.com/CoffeaTeam/coffea/blob/8c58807e199a7694bf15e3803dbaf706d34bbfa0/LICENSE if coverage is None: coverage = stats.norm.cdf(1) - stats.norm.cdf(-1) # Numerator is subset of denominator From fb69e4eb92ea2b11dfaddcbd947ff665d63c14e8 Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Wed, 7 Apr 2021 15:21:03 -0500 Subject: [PATCH 07/20] Apply Henry's advice from code review in PR 161 --- src/hist/intervals.py | 6 +++--- tests/test_intervals.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/hist/intervals.py b/src/hist/intervals.py index a5a7299d..e70298dd 100644 --- a/src/hist/intervals.py +++ b/src/hist/intervals.py @@ -98,14 +98,14 @@ def clopper_pearson_interval( def ratio_uncertainty( num: np.ndarray, denom: np.ndarray, - uncert_type: Literal["poisson", "poisson-ratio"] = "poisson", + uncertainty_type: Literal["poisson", "poisson-ratio"] = "poisson", ) -> Any: # Note: As return is a numpy ufuncs the type is "Any" with np.errstate(divide="ignore"): ratio = num / denom - if uncert_type == "poisson": + if uncertainty_type == "poisson": ratio_uncert = np.abs(poisson_interval(ratio, num / np.square(denom)) - ratio) - elif uncert_type == "poisson-ratio": + elif uncertainty_type == "poisson-ratio": # poisson ratio n/m is equivalent to binomial n/(n+m) ratio_uncert = np.abs(clopper_pearson_interval(num, num + denom) - ratio) return ratio_uncert diff --git a/tests/test_intervals.py b/tests/test_intervals.py index c6dd5493..20774387 100644 --- a/tests/test_intervals.py +++ b/tests/test_intervals.py @@ -184,7 +184,7 @@ def test_ratio_uncert_poisson(hist_fixture): with np.errstate(divide="ignore", invalid="ignore"): uncert_min, uncert_max = intervals.ratio_uncertainty( - hist_1.values(), hist_2.values(), uncert_type="poisson" + hist_1.values(), hist_2.values(), uncertainty_type="poisson" ) uncert_min[np.isnan(uncert_min)] = np.inf From 9abfccf8b14d0fc4cac1520b481705577e2aa1ab Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Wed, 7 Apr 2021 16:31:02 -0500 Subject: [PATCH 08/20] reduce number of bins for simplicity --- tests/test_intervals.py | 242 ++++++++++------------------------------ 1 file changed, 60 insertions(+), 182 deletions(-) diff --git a/tests/test_intervals.py b/tests/test_intervals.py index 20774387..5f319b0d 100644 --- a/tests/test_intervals.py +++ b/tests/test_intervals.py @@ -11,12 +11,12 @@ def hist_fixture(): hist_1 = Hist( axis.Regular( - 30, -3, 3, name="X", label="x [units]", underflow=False, overflow=False + 10, -3, 3, name="X", label="x [units]", underflow=False, overflow=False ) ).fill(np.random.normal(size=1000)) hist_2 = Hist( axis.Regular( - 30, -3, 3, name="X", label="x [units]", underflow=False, overflow=False + 10, -3, 3, name="X", label="x [units]", underflow=False, overflow=False ) ).fill(np.random.normal(size=1700)) @@ -29,74 +29,33 @@ def test_poisson_interval(hist_fixture): interval_min, interval_max = intervals.poisson_interval( hist_1.values(), hist_2.values() ) - interval_min[np.isnan(interval_min)] = np.inf assert approx(interval_min) == np.array( [ - np.inf, - 1.367295313890434, - 0.11640648835506541, - 0.16813691995933283, - 6.218902013333934, - 5.581885639953393, - 12.073194296966197, - 24.41791172658764, - 26.448630661223767, - 29.586027214407356, - 44.61442023134141, - 52.513856022921146, - 77.22286346848537, - 56.48053933662985, - 59.181466346149975, - 73.46017971168686, - 76.98381465233572, - 51.11422215924317, - 60.59758574929647, - 38.220957585930776, - 35.33980227713647, - 12.694821887253045, - 28.556630883837485, - 9.916925984144548, - 11.923130919510358, - 6.553001854991516, - 0.8338910101291227, - 3.279073015531129, - 0.3455075580468999, - np.inf, + 1.5300291697782136, + 15.163319280895097, + 70.76628228209115, + 138.53885398885032, + 206.76205061547802, + 216.04895966121967, + 146.23699499970348, + 59.34874385941129, + 23.42140089960769, + 4.398298999080774, ] ) assert approx(interval_max) == np.array( [ - 1.8410216450092634, - 5.918185832883396, - 9.436904357727963, - 8.497457342237139, - 12.809793765013492, - 16.997260128680114, - 23.61395642678031, - 39.15367169418417, - 43.44876191270042, - 48.52076138259468, - 65.17807066898288, - 75.39575495375637, - 97.94286113578973, - 81.65821409089807, - 84.97094651670344, - 98.2239031484522, - 100.48976217732788, - 77.37159837327934, - 83.05078530924457, - 60.007720159359465, - 54.559021073997485, - 30.628601280110384, - 42.75475530038196, - 24.995626256846915, - 21.276544584200877, - 17.752810841884095, - 7.956394295315844, - 10.367005923085337, - 6.59905311823171, - 1.8410216450092634, + 12.599700199673102, + 28.738493673101413, + 94.88918823365604, + 173.30954997605485, + 246.94963052163627, + 257.713403993323, + 181.58237748187338, + 84.74029590412256, + 38.20780361508876, + 13.75903516119368, ] ) @@ -110,71 +69,31 @@ def test_clopper_pearson_interval(hist_fixture): assert approx(interval_min) == np.array( [ - 0.0, - 0.356101599132999, - 0.24823747856790412, - 0.24823747856790412, - 0.5796381721364965, - 0.5945462492365069, - 0.6553316088046119, - 0.7017549604674665, - 0.7073134299532806, - 0.7135499379572423, - 0.7303918280217871, - 0.7366025731666422, - 0.747712907749326, - 0.7394499309400637, + 0.4757493739959414, + 0.6739341864914268, + 0.745848569184471, + 0.7626112365367469, + 0.769793269861182, + 0.7705165046817444, + 0.7636693080808218, 0.7409965720106119, - 0.7469889633432774, - 0.7480646510754927, - 0.7366025731666422, - 0.7409965720106119, - 0.725190715883876, - 0.7210714404984849, - 0.669881130877133, - 0.7089853912674083, - 0.6494644930140533, - 0.6494644930140533, - 0.6071564532535227, - 0.356101599132999, - 0.5119169044053422, - 0.24823747856790412, - 0.0, + 0.6996942106437167, + 0.5617068220945686, ] ) assert approx(interval_max) == np.array( [ - 1.0, - 0.9877641297108364, - 0.9967763463205565, - 0.9967763463205565, - 0.9338657953092819, - 0.9280461985173104, - 0.9006525539880732, - 0.8750821652665013, - 0.8716966334707261, - 0.8678052739470051, - 0.8567701018486796, - 0.8524911283824559, - 0.8445254405242179, - 0.8504887845995179, - 0.8493900886167576, - 0.8450572872847111, - 0.8442663627032932, - 0.8524911283824559, + 0.9660393063397832, + 0.8909499770371633, + 0.8458913477820733, + 0.8331466664953922, + 0.8273390876155183, + 0.8267415410793807, + 0.832305086850061, 0.8493900886167576, - 0.8602638210720486, - 0.8629753273954497, - 0.8931224604654777, - 0.8706631844333439, - 0.9035746974457707, - 0.9035746974457707, - 0.922861360677511, - 0.9877641297108364, - 0.9564011091136548, - 0.9967763463205565, - 1.0, + 0.8763181169809199, + 0.9404385982116935, ] ) @@ -186,74 +105,33 @@ def test_ratio_uncert_poisson(hist_fixture): uncert_min, uncert_max = intervals.ratio_uncertainty( hist_1.values(), hist_2.values(), uncertainty_type="poisson" ) - uncert_min[np.isnan(uncert_min)] = np.inf assert approx(uncert_min) == np.array( [ - np.inf, - 0.5442348953698554, - 0.1845449371687184, - 0.2153024266968381, - 0.36793262965634477, - 0.14803306854103054, - 0.16328735597366828, - 0.12585375740467664, - 0.10003669671301474, - 0.08523837652614685, - 0.0823105658636859, - 0.07131648130726226, - 0.09697312390629986, - 0.061387187171010604, - 0.05961885328676131, - 0.06866697092973495, - 0.07674569867624004, - 0.05535754842731544, - 0.0771216359030582, - 0.07117504367911598, - 0.08694657891720786, - 0.07918657111096644, - 0.1401827326329852, - 0.10148207767702055, - 0.23281182525904753, - 0.15550378099768142, - 0.2721174476849277, - 0.2974914224119715, - 0.3229536400452572, - np.inf, + 0.1439794096271186, + 0.12988019998066708, + 0.0711565635066328, + 0.045722288708959336, + 0.04049103990124614, + 0.038474711321686006, + 0.045227104349518155, + 0.06135954973309016, + 0.12378460125991042, + 0.19774186117590858, ] ) assert approx(uncert_max) == np.array( [ - 0.6136738816697545, - 0.972728610961132, - 0.376837089065035, - 0.4396432705758742, - 0.5137755172084555, - 0.2031880838575899, - 0.20814877559535383, - 0.15062730088147014, - 0.11876035397215612, - 0.1002567163955489, - 0.09431292052386309, - 0.08089436452856247, - 0.10794941653181667, - 0.06930343127573091, - 0.06713287739738094, - 0.07653527870763621, - 0.08538026910447305, - 0.06279212910958337, - 0.08684161204615337, - 0.08223020537095749, - 0.10109881672756582, - 0.09904498624127606, - 0.16600975376913252, - 0.13033501446195506, - 0.2990038567068378, - 0.21031050110314442, - 0.486364305480566, - 0.44795519324807165, - 0.6594649058638113, - 0.6136738816697545, + 0.22549817680979262, + 0.1615766277480729, + 0.07946632561746425, + 0.04954668134626106, + 0.04327624938437291, + 0.04106267733757407, + 0.04891233040201837, + 0.06909296140898324, + 0.1485919630151803, + 0.2817958228477908, ] ) From cde55f1b0939d7f01917b0a4185572d50b082a73 Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Wed, 7 Apr 2021 16:31:55 -0500 Subject: [PATCH 09/20] uncert -> uncertainity --- tests/test_intervals.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_intervals.py b/tests/test_intervals.py index 5f319b0d..c2087bd7 100644 --- a/tests/test_intervals.py +++ b/tests/test_intervals.py @@ -102,11 +102,11 @@ def test_ratio_uncert_poisson(hist_fixture): hist_1, hist_2 = hist_fixture with np.errstate(divide="ignore", invalid="ignore"): - uncert_min, uncert_max = intervals.ratio_uncertainty( + uncertainty_min, uncertainty_max = intervals.ratio_uncertainty( hist_1.values(), hist_2.values(), uncertainty_type="poisson" ) - assert approx(uncert_min) == np.array( + assert approx(uncertainty_min) == np.array( [ 0.1439794096271186, 0.12988019998066708, @@ -121,7 +121,7 @@ def test_ratio_uncert_poisson(hist_fixture): ] ) - assert approx(uncert_max) == np.array( + assert approx(uncertainty_max) == np.array( [ 0.22549817680979262, 0.1615766277480729, From b806357d2081c86f9dff92a60fb1a7a2019bc687 Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Wed, 7 Apr 2021 16:40:47 -0500 Subject: [PATCH 10/20] Add docstring for ratio_uncertainty --- src/hist/intervals.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/hist/intervals.py b/src/hist/intervals.py index e70298dd..9a80791e 100644 --- a/src/hist/intervals.py +++ b/src/hist/intervals.py @@ -100,6 +100,20 @@ def ratio_uncertainty( denom: np.ndarray, uncertainty_type: Literal["poisson", "poisson-ratio"] = "poisson", ) -> Any: + r""" + Calculate the uncertainties for the values of the ratio ``num/denom`` using + the specified coverage interval approach. + + Args: + num: Numerator or number of successes. + denom: Denominator or number of trials. + uncertainty_type: Coverage interval type to use in the calculation of + the uncertainties. + Default is ``"poisson"``. + + Returns: + The uncertainties for the ratio. + """ # Note: As return is a numpy ufuncs the type is "Any" with np.errstate(divide="ignore"): ratio = num / denom From 4a0e13d59c178c658e5b1d22958e0a58061ad42e Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Wed, 7 Apr 2021 23:33:23 -0500 Subject: [PATCH 11/20] Simplify again given the very course binning --- tests/test_intervals.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/test_intervals.py b/tests/test_intervals.py index c2087bd7..19e32cf9 100644 --- a/tests/test_intervals.py +++ b/tests/test_intervals.py @@ -101,10 +101,9 @@ def test_clopper_pearson_interval(hist_fixture): def test_ratio_uncert_poisson(hist_fixture): hist_1, hist_2 = hist_fixture - with np.errstate(divide="ignore", invalid="ignore"): - uncertainty_min, uncertainty_max = intervals.ratio_uncertainty( - hist_1.values(), hist_2.values(), uncertainty_type="poisson" - ) + uncertainty_min, uncertainty_max = intervals.ratio_uncertainty( + hist_1.values(), hist_2.values(), uncertainty_type="poisson" + ) assert approx(uncertainty_min) == np.array( [ From b4295ca14b05ed9e359577c45a4f9d2ae7961074 Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Wed, 7 Apr 2021 23:39:24 -0500 Subject: [PATCH 12/20] Raise TypeError if invalid uncertainity_type passed --- src/hist/intervals.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/hist/intervals.py b/src/hist/intervals.py index 9a80791e..71941bbc 100644 --- a/src/hist/intervals.py +++ b/src/hist/intervals.py @@ -122,4 +122,8 @@ def ratio_uncertainty( elif uncertainty_type == "poisson-ratio": # poisson ratio n/m is equivalent to binomial n/(n+m) ratio_uncert = np.abs(clopper_pearson_interval(num, num + denom) - ratio) + else: + raise TypeError( + f"'{uncertainty_type}' is an invalid option for uncertainty_type." + ) return ratio_uncert From f594fcb67312fe397e26ee062f41e63d48958d60 Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Wed, 7 Apr 2021 23:39:39 -0500 Subject: [PATCH 13/20] Test for TypeError --- tests/test_intervals.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/test_intervals.py b/tests/test_intervals.py index 19e32cf9..76ba3a00 100644 --- a/tests/test_intervals.py +++ b/tests/test_intervals.py @@ -98,7 +98,7 @@ def test_clopper_pearson_interval(hist_fixture): ) -def test_ratio_uncert_poisson(hist_fixture): +def test_ratio_uncert(hist_fixture): hist_1, hist_2 = hist_fixture uncertainty_min, uncertainty_max = intervals.ratio_uncertainty( @@ -134,3 +134,8 @@ def test_ratio_uncert_poisson(hist_fixture): 0.2817958228477908, ] ) + + with pytest.raises(TypeError): + intervals.ratio_uncertainty( + hist_1.values(), hist_2.values(), uncertainty_type="fail" + ) From 97a0daf25a89e7a48fefe73ff28b235d7b82958f Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Wed, 7 Apr 2021 23:41:56 -0500 Subject: [PATCH 14/20] full name again --- tests/test_intervals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_intervals.py b/tests/test_intervals.py index 76ba3a00..b09a6752 100644 --- a/tests/test_intervals.py +++ b/tests/test_intervals.py @@ -98,7 +98,7 @@ def test_clopper_pearson_interval(hist_fixture): ) -def test_ratio_uncert(hist_fixture): +def test_ratio_uncertainty(hist_fixture): hist_1, hist_2 = hist_fixture uncertainty_min, uncertainty_max = intervals.ratio_uncertainty( From 5a864139b5f975d29e1c2aa3fd5af534c25dd060 Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Thu, 8 Apr 2021 14:20:57 -0500 Subject: [PATCH 15/20] Remove intervals from default imports as contains non-core dependencies --- src/hist/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/hist/__init__.py b/src/hist/__init__.py index 015d59ba..21683fd5 100644 --- a/src/hist/__init__.py +++ b/src/hist/__init__.py @@ -7,7 +7,7 @@ from types import ModuleType from typing import Tuple -from . import accumulators, axis, intervals, numpy, storage, tag +from . import accumulators, axis, numpy, storage, tag from .basehist import BaseHist from .hist import Hist from .namedhist import NamedHist @@ -23,7 +23,6 @@ "NamedHist", "accumulators", "axis", - "intervals", "loc", "numpy", "overflow", From 1a1640c113ea1a3b35dc413760e5dafc5e430582 Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Thu, 8 Apr 2021 14:26:27 -0500 Subject: [PATCH 16/20] Add ImportError check for scipy import --- src/hist/intervals.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/hist/intervals.py b/src/hist/intervals.py index 71941bbc..091b8e97 100644 --- a/src/hist/intervals.py +++ b/src/hist/intervals.py @@ -1,10 +1,20 @@ from typing import Any, Optional import numpy as np -from scipy import stats from .typing import Literal +try: + from scipy import stats +except ImportError: + from sys import stderr + + print( + "hist.intervals requires scipy. Please install hist[plot] or manually install scipy.", + file=stderr, + ) + raise + __all__ = ("poisson_interval", "clopper_pearson_interval", "ratio_uncertainty") From ff74f2578494e7baf2d539948d2d74e75139e8f7 Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Thu, 8 Apr 2021 14:34:30 -0500 Subject: [PATCH 17/20] Correct poisson_interval docstring --- src/hist/intervals.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/hist/intervals.py b/src/hist/intervals.py index 091b8e97..4f481331 100644 --- a/src/hist/intervals.py +++ b/src/hist/intervals.py @@ -35,7 +35,6 @@ def poisson_interval( When a bin is zero, the scale of the nearest nonzero bin is substituted to scale the nominal upper bound. - If all bins zero, a warning is generated and interval is set to ``values``. Args: values: Sum of weights. @@ -57,7 +56,7 @@ def poisson_interval( available = np.nonzero(values) if len(available[0]) == 0: raise RuntimeError( - "All values are zero! Cannot compute meaningful error bars", + "All values are zero! Cannot compute meaningful uncertainties.", ) nearest = np.sum( [np.square(np.subtract.outer(d, d0)) for d, d0 in zip(available, missing)] From 821994a19dac671c2aaaeddedb08095d6b2330c6 Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Thu, 8 Apr 2021 14:42:47 -0500 Subject: [PATCH 18/20] Change ImportError -> ModuleNotFoundError --- src/hist/intervals.py | 2 +- src/hist/plot.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hist/intervals.py b/src/hist/intervals.py index 4f481331..d10c78ef 100644 --- a/src/hist/intervals.py +++ b/src/hist/intervals.py @@ -6,7 +6,7 @@ try: from scipy import stats -except ImportError: +except ModuleNotFoundError: from sys import stderr print( diff --git a/src/hist/plot.py b/src/hist/plot.py index 8b7855eb..2cec5e03 100644 --- a/src/hist/plot.py +++ b/src/hist/plot.py @@ -232,7 +232,7 @@ def plot_pull( try: from iminuit import Minuit # noqa: F401 from scipy.optimize import curve_fit # noqa: F401 - except ImportError: + except ModuleNotFoundError: print( "Hist.plot_pull requires scipy and iminuit. Please install hist[plot] or manually install dependencies.", file=sys.stderr, From aa09c8309ad7e47dc303db8c55bc3c3704e4fb04 Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Thu, 8 Apr 2021 16:10:12 -0500 Subject: [PATCH 19/20] Add expanded docstring on uncertainty_type options --- src/hist/intervals.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/hist/intervals.py b/src/hist/intervals.py index d10c78ef..861c4866 100644 --- a/src/hist/intervals.py +++ b/src/hist/intervals.py @@ -117,8 +117,11 @@ def ratio_uncertainty( num: Numerator or number of successes. denom: Denominator or number of trials. uncertainty_type: Coverage interval type to use in the calculation of - the uncertainties. - Default is ``"poisson"``. + the uncertainties. + ``"poisson"`` (default) implements the Poisson interval for the + numerator scaled by the denominator. + ``"poisson-ratio"`` implements the Clopper-Pearson interval for Poisson + distributed ``num`` and ``denom``. Returns: The uncertainties for the ratio. From bf2388cc59ee3729408e9289d7d8586f1e58b6a3 Mon Sep 17 00:00:00 2001 From: Matthew Feickert Date: Thu, 8 Apr 2021 18:23:00 -0500 Subject: [PATCH 20/20] Add the __dir__ trick --- src/hist/intervals.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/hist/intervals.py b/src/hist/intervals.py index 861c4866..678e6b79 100644 --- a/src/hist/intervals.py +++ b/src/hist/intervals.py @@ -1,4 +1,4 @@ -from typing import Any, Optional +from typing import Any, Optional, Tuple import numpy as np @@ -18,6 +18,10 @@ __all__ = ("poisson_interval", "clopper_pearson_interval", "ratio_uncertainty") +def __dir__() -> Tuple[str, ...]: + return __all__ + + def poisson_interval( values: np.ndarray, variances: np.ndarray, coverage: "Optional[float]" = None ) -> np.ndarray: