From 63da6ff8cb932490c11eb87035ff77a4dfd1fa22 Mon Sep 17 00:00:00 2001 From: Yash1256 Date: Tue, 29 Sep 2020 17:48:19 +0530 Subject: [PATCH 1/5] Formatted Next 15 Files --- pymc3/model.py | 150 +++++------- pymc3/sampling.py | 20 +- pymc3/tests/test_examples.py | 296 ++++++++++++++++------- pymc3/tests/test_math.py | 20 +- pymc3/tests/test_minibatches.py | 97 ++++---- pymc3/tests/test_model_func.py | 21 +- pymc3/tests/test_model_graph.py | 47 ++-- pymc3/tests/test_model_helpers.py | 36 +-- pymc3/tests/test_modelcontext.py | 18 +- pymc3/tests/test_models_linear.py | 96 +++----- pymc3/tests/test_models_utils.py | 51 ++-- pymc3/tests/test_parallel_sampling.py | 74 ++++-- pymc3/tests/test_pickling.py | 6 +- pymc3/tests/test_posdef_sym.py | 12 +- pymc3/tests/test_posterior_predictive.py | 38 +-- 15 files changed, 527 insertions(+), 455 deletions(-) diff --git a/pymc3/model.py b/pymc3/model.py index c01ca227b0..58cbab99d1 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -70,9 +70,9 @@ def _str_repr(self, name=None, dist=None, formatting="plain"): else: return super().__str__() - if name is None and hasattr(self, 'name'): + if name is None and hasattr(self, "name"): name = self.name - if dist is None and hasattr(self, 'distribution'): + if dist is None and hasattr(self, "distribution"): dist = self.distribution return self.distribution._str_repr(name=name, dist=dist, formatting=formatting) @@ -123,8 +123,7 @@ def incorporate_methods(source, destination, methods, wrapper=None, override=Fal for method in methods: if hasattr(destination, method) and not override: raise AttributeError( - f"Cannot add method {method!r}" - + "to destination object as it already exists. " + f"Cannot add method {method!r}" + "to destination object as it already exists. " "To prevent this error set 'override=True'." ) if hasattr(source, method): @@ -172,12 +171,8 @@ def get_named_nodes_and_relations(graph): else: ancestors = {} descendents = {} - descendents, ancestors = _get_named_nodes_and_relations( - graph, None, ancestors, descendents - ) - leaf_dict = { - node.name: node for node, ancestor in ancestors.items() if len(ancestor) == 0 - } + descendents, ancestors = _get_named_nodes_and_relations(graph, None, ancestors, descendents) + leaf_dict = {node.name: node for node, ancestor in ancestors.items() if len(ancestor) == 0} return leaf_dict, descendents, ancestors @@ -529,9 +524,7 @@ def tree_contains(self, item): def __setitem__(self, key, value): raise NotImplementedError( - "Method is removed as we are not" - " able to determine " - "appropriate logic for it" + "Method is removed as we are not" " able to determine " "appropriate logic for it" ) # Added this because mypy didn't like having __imul__ without __mul__ @@ -620,7 +613,7 @@ def __init__( dtype=None, casting="no", compute_grads=True, - **kwargs + **kwargs, ): from .distributions import TensorType @@ -695,9 +688,7 @@ def __init__( inputs = [self._vars_joined] - self._theano_function = theano.function( - inputs, outputs, givens=givens, **kwargs - ) + self._theano_function = theano.function(inputs, outputs, givens=givens, **kwargs) def set_weights(self, values): if values.shape != (self._n_costs - 1,): @@ -713,10 +704,7 @@ def get_extra_values(self): if not self._extra_are_set: raise ValueError("Extra values are not set.") - return { - var.name: self._extra_vars_shared[var.name].get_value() - for var in self._extra_vars - } + return {var.name: self._extra_vars_shared[var.name].get_value() for var in self._extra_vars} def __call__(self, array, grad_out=None, extra_vars=None): if extra_vars is not None: @@ -727,8 +715,7 @@ def __call__(self, array, grad_out=None, extra_vars=None): if array.shape != (self.size,): raise ValueError( - "Invalid shape for array. Must be %s but is %s." - % ((self.size,), array.shape) + "Invalid shape for array. Must be %s but is %s." % ((self.size,), array.shape) ) if grad_out is None: @@ -758,13 +745,10 @@ def dict_to_array(self, point): def array_to_dict(self, array): """Convert an array to a dictionary containing the grad_vars.""" if array.shape != (self.size,): - raise ValueError( - f"Array should have shape ({self.size},) but has {array.shape}" - ) + raise ValueError(f"Array should have shape ({self.size},) but has {array.shape}") if array.dtype != self.dtype: raise ValueError( - "Array has invalid dtype. Should be %s but is %s" - % (self._dtype, self.dtype) + "Array has invalid dtype. Should be %s but is %s" % (self._dtype, self.dtype) ) point = {} for varmap in self._ordering.vmap: @@ -993,12 +977,10 @@ def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs): if tempered: with self: - free_RVs_logp = tt.sum([ - tt.sum(var.logpt) for var in self.free_RVs + self.potentials - ]) - observed_RVs_logp = tt.sum([ - tt.sum(var.logpt) for var in self.observed_RVs - ]) + free_RVs_logp = tt.sum( + [tt.sum(var.logpt) for var in self.free_RVs + self.potentials] + ) + observed_RVs_logp = tt.sum([tt.sum(var.logpt) for var in self.observed_RVs]) costs = [free_RVs_logp, observed_RVs_logp] else: @@ -1038,7 +1020,7 @@ def logp_nojact(self): @property def varlogpt(self): """Theano scalar of log-probability of the unobserved random variables - (excluding deterministic).""" + (excluding deterministic).""" with self: factors = [var.logpt for var in self.free_RVs] return tt.sum(factors) @@ -1110,9 +1092,7 @@ def add_coords(self, coords): ) if name in self.coords: if not coords[name].equals(self.coords[name]): - raise ValueError( - "Duplicate and incompatiple coordinate: %s." % name - ) + raise ValueError("Duplicate and incompatiple coordinate: %s." % name) else: self.coords[name] = coords[name] @@ -1141,9 +1121,7 @@ def Var(self, name, dist, data=None, total_size=None, dims=None): if data is None: if getattr(dist, "transform", None) is None: with self: - var = FreeRV( - name=name, distribution=dist, total_size=total_size, model=self - ) + var = FreeRV(name=name, distribution=dist, total_size=total_size, model=self) self.free_RVs.append(var) else: with self: @@ -1218,8 +1196,7 @@ def prefix(self): return "%s_" % self.name if self.name else "" def name_for(self, name): - """Checks if name has prefix and adds if needed - """ + """Checks if name has prefix and adds if needed""" if self.prefix: if not name.startswith(self.prefix): return f"{self.prefix}{name}" @@ -1229,8 +1206,7 @@ def name_for(self, name): return name def name_of(self, name): - """Checks if name has prefix and deletes if needed - """ + """Checks if name has prefix and deletes if needed""" if not self.prefix or not name: return name elif name.startswith(self.prefix): @@ -1269,7 +1245,7 @@ def makefn(self, outs, mode=None, *args, **kwargs): accept_inplace=True, mode=mode, *args, - **kwargs + **kwargs, ) def fn(self, outs, mode=None, *args, **kwargs): @@ -1391,10 +1367,7 @@ def check_test_point(self, test_point=None, round_vals=2): test_point = self.test_point return Series( - { - RV.name: np.round(RV.logp(self.test_point), round_vals) - for RV in self.basic_RVs - }, + {RV.name: np.round(RV.logp(self.test_point), round_vals) for RV in self.basic_RVs}, name="Log-probability of test_point", ) @@ -1403,23 +1376,31 @@ def _str_repr(self, formatting="plain", **kwargs): if formatting == "latex": rv_reprs = [rv.__latex__() for rv in all_rv] - rv_reprs = [rv_repr.replace(r"\sim", r"&\sim &").strip("$") - for rv_repr in rv_reprs if rv_repr is not None] + rv_reprs = [ + rv_repr.replace(r"\sim", r"&\sim &").strip("$") + for rv_repr in rv_reprs + if rv_repr is not None + ] return r"""$$ \begin{{array}}{{rcl}} {} \end{{array}} $$""".format( - "\\\\".join(rv_reprs)) + "\\\\".join(rv_reprs) + ) else: rv_reprs = [rv.__str__() for rv in all_rv] - rv_reprs = [rv_repr for rv_repr in rv_reprs if not 'TransformedDistribution()' in rv_repr] + rv_reprs = [ + rv_repr for rv_repr in rv_reprs if not "TransformedDistribution()" in rv_repr + ] # align vars on their ~ - names = [s[:s.index('~')-1] for s in rv_reprs] - distrs = [s[s.index('~')+2:] for s in rv_reprs] + names = [s[: s.index("~") - 1] for s in rv_reprs] + distrs = [s[s.index("~") + 2 :] for s in rv_reprs] maxlen = str(max(len(x) for x in names)) - rv_reprs = [('{name:>' + maxlen + '} ~ {distr}').format(name=n, distr=d) - for n, d in zip(names, distrs)] + rv_reprs = [ + ("{name:>" + maxlen + "} ~ {distr}").format(name=n, distr=d) + for n, d in zip(names, distrs) + ] return "\n".join(rv_reprs) def __str__(self, **kwargs): @@ -1537,8 +1518,9 @@ def Point(*args, **kwargs): except Exception as e: raise TypeError(f"can't turn {args} and {kwargs} into a dict. {e}") return { - get_var_name(k): np.array(v) for k, v in d.items() - if get_var_name(k) in map(get_var_name, model.vars) + get_var_name(k): np.array(v) + for k, v in d.items() + if get_var_name(k) in map(get_var_name, model.vars) } @@ -1593,11 +1575,7 @@ def _get_scaling(total_size, shape, ndim): denom = 1 coef = floatX(total_size) / floatX(denom) elif isinstance(total_size, (list, tuple)): - if not all( - isinstance(i, int) - for i in total_size - if (i is not Ellipsis and i is not None) - ): + if not all(isinstance(i, int) for i in total_size if (i is not Ellipsis and i is not None)): raise TypeError( "Unrecognized `total_size` type, expected " "int or list of ints, got %r" % total_size @@ -1625,16 +1603,13 @@ def _get_scaling(total_size, shape, ndim): else: shp_end = np.asarray([]) shp_begin = shape[: len(begin)] - begin_coef = [ - floatX(t) / shp_begin[i] for i, t in enumerate(begin) if t is not None - ] + begin_coef = [floatX(t) / shp_begin[i] for i, t in enumerate(begin) if t is not None] end_coef = [floatX(t) / shp_end[i] for i, t in enumerate(end) if t is not None] coefs = begin_coef + end_coef coef = tt.prod(coefs) else: raise TypeError( - "Unrecognized `total_size` type, expected " - "int or list of ints, got %r" % total_size + "Unrecognized `total_size` type, expected " "int or list of ints, got %r" % total_size ) return tt.as_tensor(floatX(coef)) @@ -1753,9 +1728,7 @@ def as_tensor(data, name, model, distribution): testval=testval, parent_dist=distribution, ) - missing_values = FreeRV( - name=name + "_missing", distribution=fakedist, model=model - ) + missing_values = FreeRV(name=name + "_missing", distribution=fakedist, model=model) constant = tt.as_tensor_variable(data.filled()) dataTensor = tt.set_subtensor(constant[data.mask.nonzero()], missing_values) @@ -1854,14 +1827,11 @@ def __init__(self, name, data, distribution, total_size=None, model=None): """ self.name = name self.data = { - name: as_tensor(data, name, model, distribution) - for name, data in data.items() + name: as_tensor(data, name, model, distribution) for name, data in data.items() } self.missing_values = [ - datum.missing_values - for datum in self.data.values() - if datum.missing_values is not None + datum.missing_values for datum in self.data.values() if datum.missing_values is not None ] self.logp_elemwiset = distribution.logp(**self.data) # The logp might need scaling in minibatches. @@ -1871,9 +1841,7 @@ def __init__(self, name, data, distribution, total_size=None, model=None): self.total_size = total_size self.model = model self.distribution = distribution - self.scaling = _get_scaling( - total_size, self.logp_elemwiset.shape, self.logp_elemwiset.ndim - ) + self.scaling = _get_scaling(total_size, self.logp_elemwiset.shape, self.logp_elemwiset.ndim) # Make hashable by id for draw_values def __hash__(self): @@ -1888,7 +1856,7 @@ def __ne__(self, other): return not self == other -def _walk_up_rv(rv, formatting='plain'): +def _walk_up_rv(rv, formatting="plain"): """Walk up theano graph to get inputs for deterministic RV.""" all_rvs = [] parents = list(itertools.chain(*[j.inputs for j in rv.get_parents()])) @@ -1903,21 +1871,23 @@ def _walk_up_rv(rv, formatting='plain'): class DeterministicWrapper(tt.TensorVariable): - def _str_repr(self, formatting='plain'): - if formatting == 'latex': + def _str_repr(self, formatting="plain"): + if formatting == "latex": return r"$\text{{{name}}} \sim \text{{Deterministic}}({args})$".format( - name=self.name, args=r",~".join(_walk_up_rv(self, formatting=formatting))) + name=self.name, args=r",~".join(_walk_up_rv(self, formatting=formatting)) + ) else: return "{name} ~ Deterministic({args})".format( - name=self.name, args=", ".join(_walk_up_rv(self, formatting=formatting))) + name=self.name, args=", ".join(_walk_up_rv(self, formatting=formatting)) + ) def _repr_latex_(self): - return self._str_repr(formatting='latex') + return self._str_repr(formatting="latex") __latex__ = _repr_latex_ def __str__(self): - return self._str_repr(formatting='plain') + return self._str_repr(formatting="plain") def Deterministic(name, var, model=None, dims=None): @@ -1936,7 +1906,7 @@ def Deterministic(name, var, model=None, dims=None): var = var.copy(model.name_for(name)) model.deterministics.append(var) model.add_random_variable(var, dims) - var.__class__ = DeterministicWrapper # adds str and latex functionality + var.__class__ = DeterministicWrapper # adds str and latex functionality return var @@ -2030,7 +2000,7 @@ def as_iterargs(data): def all_continuous(vars): """Check that vars not include discrete variables, excepting - ObservedRVs. """ + ObservedRVs.""" vars_ = [var for var in vars if not isinstance(var, pm.model.ObservedRV)] if any([var.dtype in pm.discrete_types for var in vars_]): return False diff --git a/pymc3/sampling.py b/pymc3/sampling.py index c134555be2..f577a5aa8e 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -657,7 +657,14 @@ def _check_start_shape(model, start): def _sample_many( - draws, chain: int, chains: int, start: list, random_seed: list, step, callback=None, **kwargs, + draws, + chain: int, + chains: int, + start: list, + random_seed: list, + step, + callback=None, + **kwargs, ): """Samples all chains sequentially. @@ -994,7 +1001,8 @@ def _iter_sample( if callback is not None: warns = getattr(step, "warnings", None) callback( - trace=strace, draw=Draw(chain, i == draws, i, i < tune, stats, point, warns), + trace=strace, + draw=Draw(chain, i == draws, i, i < tune, stats, point, warns), ) yield strace, diverging @@ -1947,7 +1955,13 @@ def sample_prior_predictive( def init_nuts( - init="auto", chains=1, n_init=500000, model=None, random_seed=None, progressbar=True, **kwargs, + init="auto", + chains=1, + n_init=500000, + model=None, + random_seed=None, + progressbar=True, + **kwargs, ): """Set up the mass matrix initialization for NUTS. diff --git a/pymc3/tests/test_examples.py b/pymc3/tests/test_examples.py index 9f77277f48..fbb5db0d8c 100644 --- a/pymc3/tests/test_examples.py +++ b/pymc3/tests/test_examples.py @@ -24,46 +24,46 @@ from .helpers import SeededTest -if version.parse(matplotlib.__version__) < version.parse('3.3'): - matplotlib.use('Agg', warn=False) +if version.parse(matplotlib.__version__) < version.parse("3.3"): + matplotlib.use("Agg", warn=False) else: - matplotlib.use('Agg') + matplotlib.use("Agg") def get_city_data(): """Helper to get city data""" - data = pd.read_csv(pm.get_data('srrs2.dat')) - cty_data = pd.read_csv(pm.get_data('cty.dat')) + data = pd.read_csv(pm.get_data("srrs2.dat")) + cty_data = pd.read_csv(pm.get_data("cty.dat")) - data = data[data.state == 'MN'] + data = data[data.state == "MN"] - data['fips'] = data.stfips * 1000 + data.cntyfips - cty_data['fips'] = cty_data.stfips * 1000 + cty_data.ctfips - data['lradon'] = np.log(np.where(data.activity == 0, .1, data.activity)) - data = data.merge(cty_data, 'inner', on='fips') + data["fips"] = data.stfips * 1000 + data.cntyfips + cty_data["fips"] = cty_data.stfips * 1000 + cty_data.ctfips + data["lradon"] = np.log(np.where(data.activity == 0, 0.1, data.activity)) + data = data.merge(cty_data, "inner", on="fips") - unique = data[['fips']].drop_duplicates() - unique['group'] = np.arange(len(unique)) - unique.set_index('fips') - return data.merge(unique, 'inner', on='fips') + unique = data[["fips"]].drop_duplicates() + unique["group"] = np.arange(len(unique)) + unique.set_index("fips") + return data.merge(unique, "inner", on="fips") class TestARM5_4(SeededTest): def build_model(self): - data = pd.read_csv(pm.get_data('wells.dat'), - delimiter=' ', index_col='id', - dtype={'switch': np.int8}) + data = pd.read_csv( + pm.get_data("wells.dat"), delimiter=" ", index_col="id", dtype={"switch": np.int8} + ) data.dist /= 100 data.educ /= 4 col = data.columns P = data[col[1:]] P -= P.mean() - P['1'] = 1 + P["1"] = 1 with pm.Model() as model: - effects = pm.Normal('effects', mu=0, sigma=100, shape=len(P.columns)) + effects = pm.Normal("effects", mu=0, sigma=100, shape=len(P.columns)) logit_p = tt.dot(floatX(np.array(P)), effects) - pm.Bernoulli('s', logit_p=logit_p, observed=floatX(data.switch.values)) + pm.Bernoulli("s", logit_p=logit_p, observed=floatX(data.switch.values)) return model def test_run(self): @@ -76,32 +76,34 @@ class TestARM12_6(SeededTest): def build_model(self): data = get_city_data() - self.obs_means = data.groupby('fips').lradon.mean().to_numpy() + self.obs_means = data.groupby("fips").lradon.mean().to_numpy() lradon = data.lradon.to_numpy() floor = data.floor.to_numpy() group = data.group.to_numpy() with pm.Model() as model: - groupmean = pm.Normal('groupmean', 0, 10. ** -2.) - groupsd = pm.Uniform('groupsd', 0, 10.) - sd = pm.Uniform('sd', 0, 10.) - floor_m = pm.Normal('floor_m', 0, 5. ** -2.) - means = pm.Normal('means', groupmean, groupsd ** -2., shape=len(self.obs_means)) - pm.Normal('lr', floor * floor_m + means[group], sd ** -2., observed=lradon) + groupmean = pm.Normal("groupmean", 0, 10.0 ** -2.0) + groupsd = pm.Uniform("groupsd", 0, 10.0) + sd = pm.Uniform("sd", 0, 10.0) + floor_m = pm.Normal("floor_m", 0, 5.0 ** -2.0) + means = pm.Normal("means", groupmean, groupsd ** -2.0, shape=len(self.obs_means)) + pm.Normal("lr", floor * floor_m + means[group], sd ** -2.0, observed=lradon) return model def too_slow(self): model = self.build_model() - start = {'groupmean': self.obs_means.mean(), - 'groupsd_interval__': 0, - 'sd_interval__': 0, - 'means': self.obs_means, - 'floor_m': 0., - } + start = { + "groupmean": self.obs_means.mean(), + "groupsd_interval__": 0, + "sd_interval__": 0, + "means": self.obs_means, + "floor_m": 0.0, + } with model: - start = pm.find_MAP(start=start, - vars=[model['groupmean'], model['sd_interval__'], model['floor_m']]) + start = pm.find_MAP( + start=start, vars=[model["groupmean"], model["sd_interval__"], model["floor_m"]] + ) step = pm.NUTS(model.vars, scaling=start) pm.sample(50, step=step, start=start) @@ -109,7 +111,7 @@ def too_slow(self): class TestARM12_6Uranium(SeededTest): def build_model(self): data = get_city_data() - self.obs_means = data.groupby('fips').lradon.mean() + self.obs_means = data.groupby("fips").lradon.mean() lradon = data.lradon.to_numpy() floor = data.floor.to_numpy() @@ -117,27 +119,30 @@ def build_model(self): ufull = data.Uppm.to_numpy() with pm.Model() as model: - groupmean = pm.Normal('groupmean', 0, 10. ** -2.) - groupsd = pm.Uniform('groupsd', 0, 10.) - sd = pm.Uniform('sd', 0, 10.) - floor_m = pm.Normal('floor_m', 0, 5. ** -2.) - u_m = pm.Normal('u_m', 0, 5. ** -2) - means = pm.Normal('means', groupmean, groupsd ** -2., shape=len(self.obs_means)) - pm.Normal('lr', floor * floor_m + means[group] + ufull * u_m, sd ** - 2., - observed=lradon) + groupmean = pm.Normal("groupmean", 0, 10.0 ** -2.0) + groupsd = pm.Uniform("groupsd", 0, 10.0) + sd = pm.Uniform("sd", 0, 10.0) + floor_m = pm.Normal("floor_m", 0, 5.0 ** -2.0) + u_m = pm.Normal("u_m", 0, 5.0 ** -2) + means = pm.Normal("means", groupmean, groupsd ** -2.0, shape=len(self.obs_means)) + pm.Normal( + "lr", floor * floor_m + means[group] + ufull * u_m, sd ** -2.0, observed=lradon + ) return model def too_slow(self): model = self.build_model() with model: - start = pm.Point({ - 'groupmean': self.obs_means.mean(), - 'groupsd_interval__': 0, - 'sd_interval__': 0, - 'means': np.array(self.obs_means), - 'u_m': np.array([.72]), - 'floor_m': 0., - }) + start = pm.Point( + { + "groupmean": self.obs_means.mean(), + "groupsd_interval__": 0, + "sd_interval__": 0, + "means": np.array(self.obs_means), + "u_m": np.array([0.72]), + "floor_m": 0.0, + } + ) start = pm.find_MAP(start, model.vars[:-1]) H = model.fastd2logp() @@ -148,13 +153,121 @@ def too_slow(self): def build_disaster_model(masked=False): - disasters_data = np.array([4, 5, 4, 0, 1, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6, - 3, 3, 5, 4, 5, 3, 1, 4, 4, 1, 5, 5, 3, 4, 2, 5, - 2, 2, 3, 4, 2, 1, 3, 2, 2, 1, 1, 1, 1, 3, 0, 0, - 1, 0, 1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1, - 0, 1, 0, 1, 0, 0, 0, 2, 1, 0, 0, 0, 1, 1, 0, 2, - 3, 3, 1, 1, 2, 1, 1, 1, 1, 2, 4, 2, 0, 0, 1, 4, - 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1]) + disasters_data = np.array( + [ + 4, + 5, + 4, + 0, + 1, + 4, + 3, + 4, + 0, + 6, + 3, + 3, + 4, + 0, + 2, + 6, + 3, + 3, + 5, + 4, + 5, + 3, + 1, + 4, + 4, + 1, + 5, + 5, + 3, + 4, + 2, + 5, + 2, + 2, + 3, + 4, + 2, + 1, + 3, + 2, + 2, + 1, + 1, + 1, + 1, + 3, + 0, + 0, + 1, + 0, + 1, + 1, + 0, + 0, + 3, + 1, + 0, + 3, + 2, + 2, + 0, + 1, + 1, + 1, + 0, + 1, + 0, + 1, + 0, + 0, + 0, + 2, + 1, + 0, + 0, + 0, + 1, + 1, + 0, + 2, + 3, + 3, + 1, + 1, + 2, + 1, + 1, + 1, + 1, + 2, + 4, + 2, + 0, + 0, + 1, + 4, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 1, + 0, + 1, + ] + ) if masked: disasters_data[[23, 68]] = -1 disasters_data = np.ma.masked_values(disasters_data, value=-1) @@ -162,16 +275,16 @@ def build_disaster_model(masked=False): with pm.Model() as model: # Prior for distribution of switchpoint location - switchpoint = pm.DiscreteUniform('switchpoint', lower=0, upper=years) + switchpoint = pm.DiscreteUniform("switchpoint", lower=0, upper=years) # Priors for pre- and post-switch mean number of disasters - early_mean = pm.Exponential('early_mean', lam=1.) - late_mean = pm.Exponential('late_mean', lam=1.) + early_mean = pm.Exponential("early_mean", lam=1.0) + late_mean = pm.Exponential("late_mean", lam=1.0) # Allocate appropriate Poisson rates to years before and after current # switchpoint location idx = np.arange(years) rate = tt.switch(switchpoint >= idx, early_mean, late_mean) # Data likelihood - pm.Poisson('disasters', rate, observed=disasters_data) + pm.Poisson("disasters", rate, observed=disasters_data) return model @@ -182,7 +295,7 @@ def test_disaster_model(self): model = build_disaster_model(masked=False) with model: # Initial values for stochastic nodes - start = {'early_mean': 2., 'late_mean': 3.} + start = {"early_mean": 2.0, "late_mean": 3.0} # Use slice sampler for means (other variables auto-selected) step = pm.Slice([model.early_mean_log__, model.late_mean_log__]) tr = pm.sample(500, tune=50, start=start, step=step, chains=2) @@ -192,7 +305,7 @@ def test_disaster_model_missing(self): model = build_disaster_model(masked=True) with model: # Initial values for stochastic nodes - start = {'early_mean': 2., 'late_mean': 3.} + start = {"early_mean": 2.0, "late_mean": 3.0} # Use slice sampler for means (other variables auto-selected) step = pm.Slice([model.early_mean_log__, model.late_mean_log__]) tr = pm.sample(500, tune=50, start=start, step=step, chains=2) @@ -205,10 +318,10 @@ def build_model(self): true_intercept = 1 true_slope = 2 self.x = np.linspace(0, 1, size) - self.y = true_intercept + self.x * true_slope + np.random.normal(scale=.5, size=size) + self.y = true_intercept + self.x * true_slope + np.random.normal(scale=0.5, size=size) data = dict(x=self.x, y=self.y) with pm.Model() as model: - pm.GLM.from_formula('y ~ x', data) + pm.GLM.from_formula("y ~ x", data) return model def test_run(self): @@ -249,6 +362,7 @@ class TestLatentOccupancy(SeededTest): Created by Chris Fonnesbeck on 2008-07-28. Copyright (c) 2008 University of Otago. All rights reserved. """ + def setup_method(self): super().setup_method() # Sample size @@ -258,36 +372,39 @@ def setup_method(self): # True occupancy pi = 0.4 # Simulate some data data - self.y = ((np.random.random(n) < pi) * np.random.poisson(lam=theta, size=n)).astype('int16') + self.y = ((np.random.random(n) < pi) * np.random.poisson(lam=theta, size=n)).astype("int16") def build_model(self): with pm.Model() as model: # Estimated occupancy - psi = pm.Beta('psi', 1, 1) + psi = pm.Beta("psi", 1, 1) # Latent variable for occupancy - pm.Bernoulli('z', psi, shape=self.y.shape) + pm.Bernoulli("z", psi, shape=self.y.shape) # Estimated mean count - theta = pm.Uniform('theta', 0, 100) + theta = pm.Uniform("theta", 0, 100) # Poisson likelihood - pm.ZeroInflatedPoisson('y', theta, psi, observed=self.y) + pm.ZeroInflatedPoisson("y", theta, psi, observed=self.y) return model def test_run(self): model = self.build_model() with model: start = { - 'psi': np.array(0.5, dtype='f'), - 'z': (self.y > 0).astype('int16'), - 'theta': np.array(5, dtype='f'), + "psi": np.array(0.5, dtype="f"), + "z": (self.y > 0).astype("int16"), + "theta": np.array(5, dtype="f"), } step_one = pm.Metropolis([model.theta_interval__, model.psi_logodds__]) step_two = pm.BinaryMetropolis([model.z]) pm.sample(50, step=[step_one, step_two], start=start, chains=1) -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32 due to starting inf at starting logP") +@pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), + reason="Fails on float32 due to starting inf at starting logP", +) class TestRSV(SeededTest): - ''' + """ This model estimates the population prevalence of respiratory syncytial virus (RSV) among children in Amman, Jordan, based on 3 years of admissions diagnosed with RSV to Al Bashir hospital. @@ -298,7 +415,8 @@ class TestRSV(SeededTest): 1-year-olds) for the proportion of the population in the city, as well as for the market share of the hospital. The latter is based on expert esimate, and hence encoded as a prior. - ''' + """ + def build_model(self): # 1-year-old children in Jordan kids = np.array([180489, 191817, 190830]) @@ -308,15 +426,15 @@ def build_model(self): rsv_cases = np.array([40, 59, 65]) with pm.Model() as model: # Al Bashir hospital market share - market_share = pm.Uniform('market_share', 0.5, 0.6) + market_share = pm.Uniform("market_share", 0.5, 0.6) # Number of 1 y.o. in Amman - n_amman = pm.Binomial('n_amman', kids, amman_prop, shape=3) + n_amman = pm.Binomial("n_amman", kids, amman_prop, shape=3) # Prior probability - prev_rsv = pm.Beta('prev_rsv', 1, 5, shape=3) + prev_rsv = pm.Beta("prev_rsv", 1, 5, shape=3) # RSV in Amman - y_amman = pm.Binomial('y_amman', n_amman, prev_rsv, shape=3, testval=100) + y_amman = pm.Binomial("y_amman", n_amman, prev_rsv, shape=3, testval=100) # Likelihood for number with RSV in hospital (assumes Pr(hosp | RSV) = 1) - pm.Binomial('y_hosp', y_amman, market_share, observed=rsv_cases) + pm.Binomial("y_hosp", y_amman, market_share, observed=rsv_cases) return model def test_run(self): @@ -342,20 +460,20 @@ def build_models(self): with pm.Model() as model_coarse_0: sigma = 1.0 - x_coeff = pm.Normal('x', true_mean, sigma=10.0) - pm.Normal('y', mu=x_coeff, sigma=sigma, observed=y + 1.0) + x_coeff = pm.Normal("x", true_mean, sigma=10.0) + pm.Normal("y", mu=x_coeff, sigma=sigma, observed=y + 1.0) with pm.Model() as model_coarse_1: sigma = 1.0 - x_coeff = pm.Normal('x', true_mean, sigma=10.0) - pm.Normal('y', mu=x_coeff, sigma=sigma, observed=y + 0.5) + x_coeff = pm.Normal("x", true_mean, sigma=10.0) + pm.Normal("y", mu=x_coeff, sigma=sigma, observed=y + 0.5) coarse_models = [model_coarse_0, model_coarse_1] with pm.Model() as model: sigma = 1.0 - x_coeff = pm.Normal('x', true_mean, sigma=10.0) - pm.Normal('y', mu=x_coeff, sigma=sigma, observed=y) + x_coeff = pm.Normal("x", true_mean, sigma=10.0) + pm.Normal("y", mu=x_coeff, sigma=sigma, observed=y) return model, coarse_models diff --git a/pymc3/tests/test_math.py b/pymc3/tests/test_math.py index c24ac3bdc4..e0326cfcf8 100644 --- a/pymc3/tests/test_math.py +++ b/pymc3/tests/test_math.py @@ -43,9 +43,7 @@ def test_kronecker(): custom = kronecker(a, b, c) # Custom version nested = tt.slinalg.kron(a, tt.slinalg.kron(b, c)) - np.testing.assert_array_almost_equal( - custom.eval(), nested.eval() # Standard nested version - ) + np.testing.assert_array_almost_equal(custom.eval(), nested.eval()) # Standard nested version def test_cartesian(): @@ -203,17 +201,9 @@ def test_expand_packed_triangular(): upper_packed = floatX(vals[upper != 0]) expand_lower = expand_packed_triangular(N, packed, lower=True) expand_upper = expand_packed_triangular(N, packed, lower=False) - expand_diag_lower = expand_packed_triangular( - N, packed, lower=True, diagonal_only=True - ) - expand_diag_upper = expand_packed_triangular( - N, packed, lower=False, diagonal_only=True - ) + expand_diag_lower = expand_packed_triangular(N, packed, lower=True, diagonal_only=True) + expand_diag_upper = expand_packed_triangular(N, packed, lower=False, diagonal_only=True) assert np.all(expand_lower.eval({packed: lower_packed}) == lower) assert np.all(expand_upper.eval({packed: upper_packed}) == upper) - assert np.all( - expand_diag_lower.eval({packed: lower_packed}) == floatX(np.diag(vals)) - ) - assert np.all( - expand_diag_upper.eval({packed: upper_packed}) == floatX(np.diag(vals)) - ) + assert np.all(expand_diag_lower.eval({packed: lower_packed}) == floatX(np.diag(vals))) + assert np.all(expand_diag_upper.eval({packed: upper_packed}) == floatX(np.diag(vals))) diff --git a/pymc3/tests/test_minibatches.py b/pymc3/tests/test_minibatches.py index 675c0c3977..e2254da847 100644 --- a/pymc3/tests/test_minibatches.py +++ b/pymc3/tests/test_minibatches.py @@ -31,8 +31,9 @@ class _DataSampler: """ Not for users """ - def __init__(self, data, batchsize=50, random_seed=42, dtype='floatX'): - self.dtype = theano.config.floatX if dtype == 'floatX' else dtype + + def __init__(self, data, batchsize=50, random_seed=42, dtype="floatX"): + self.dtype = theano.config.floatX if dtype == "floatX" else dtype self.rng = np.random.RandomState(random_seed) self.data = data self.n = batchsize @@ -41,17 +42,15 @@ def __iter__(self): return self def __next__(self): - idx = (self.rng - .uniform(size=self.n, - low=0.0, - high=self.data.shape[0] - 1e-16) - .astype('int64')) + idx = self.rng.uniform(size=self.n, low=0.0, high=self.data.shape[0] - 1e-16).astype( + "int64" + ) return np.asarray(self.data[idx], self.dtype) next = __next__ -@pytest.fixture(scope='module') +@pytest.fixture(scope="module") def datagen(): return _DataSampler(np.random.uniform(size=(1000, 10))) @@ -70,9 +69,8 @@ def integers_ndim(ndim): i += 1 -@pytest.mark.usefixtures('strict_float32') +@pytest.mark.usefixtures("strict_float32") class TestGenerator: - def test_basic(self): generator = GeneratorAdapter(integers()) gop = GeneratorOp(generator)() @@ -144,7 +142,7 @@ def test_gen_cloning_with_shape_change(self, datagen): res, _ = theano.scan(lambda x: x.sum(), X, n_steps=X.shape[0]) assert res.eval().shape == (50,) shared = theano.shared(datagen.data.astype(gen.dtype)) - res2 = theano.clone(res, {gen: shared**2}) + res2 = theano.clone(res, {gen: shared ** 2}) assert res2.eval().shape == (1000,) @@ -166,13 +164,14 @@ class TestScaling: """ Related to minibatch training """ + def test_density_scaling(self): with pm.Model() as model1: - Normal('n', observed=[[1]], total_size=1) + Normal("n", observed=[[1]], total_size=1) p1 = theano.function([], model1.logpt) with pm.Model() as model2: - Normal('n', observed=[[1]], total_size=2) + Normal("n", observed=[[1]], total_size=2) p2 = theano.function([], model2.logpt) assert p1() * 2 == p2() @@ -183,15 +182,16 @@ def true_dens(): g = gen1() for i, point in enumerate(g): yield stats.norm.logpdf(point).sum() * 10 + t = true_dens() # We have same size models with pm.Model() as model1: - Normal('n', observed=gen1(), total_size=100) + Normal("n", observed=gen1(), total_size=100) p1 = theano.function([], model1.logpt) with pm.Model() as model2: gen_var = generator(gen2()) - Normal('n', observed=gen_var, total_size=100) + Normal("n", observed=gen_var, total_size=100) p2 = theano.function([], model2.logpt) for i in range(10): @@ -204,13 +204,13 @@ def true_dens(): def test_gradient_with_scaling(self): with pm.Model() as model1: genvar = generator(gen1()) - m = Normal('m') - Normal('n', observed=genvar, total_size=1000) + m = Normal("m") + Normal("n", observed=genvar, total_size=1000) grad1 = theano.function([m], tt.grad(model1.logpt, m)) with pm.Model() as model2: - m = Normal('m') + m = Normal("m") shavar = theano.shared(np.ones((1000, 100))) - Normal('n', observed=shavar) + Normal("n", observed=shavar) grad2 = theano.function([m], tt.grad(model2.logpt, m)) for i in range(10): @@ -221,85 +221,80 @@ def test_gradient_with_scaling(self): def test_multidim_scaling(self): with pm.Model() as model0: - Normal('n', observed=[[1, 1], - [1, 1]], total_size=[]) + Normal("n", observed=[[1, 1], [1, 1]], total_size=[]) p0 = theano.function([], model0.logpt) with pm.Model() as model1: - Normal('n', observed=[[1, 1], - [1, 1]], total_size=[2, 2]) + Normal("n", observed=[[1, 1], [1, 1]], total_size=[2, 2]) p1 = theano.function([], model1.logpt) with pm.Model() as model2: - Normal('n', observed=[[1], - [1]], total_size=[2, 2]) + Normal("n", observed=[[1], [1]], total_size=[2, 2]) p2 = theano.function([], model2.logpt) with pm.Model() as model3: - Normal('n', observed=[[1, 1]], total_size=[2, 2]) + Normal("n", observed=[[1, 1]], total_size=[2, 2]) p3 = theano.function([], model3.logpt) with pm.Model() as model4: - Normal('n', observed=[[1]], total_size=[2, 2]) + Normal("n", observed=[[1]], total_size=[2, 2]) p4 = theano.function([], model4.logpt) with pm.Model() as model5: - Normal('n', observed=[[1]], total_size=[2, Ellipsis, 2]) + Normal("n", observed=[[1]], total_size=[2, Ellipsis, 2]) p5 = theano.function([], model5.logpt) _p0 = p0() assert ( - np.allclose(_p0, p1()) and - np.allclose(_p0, p2()) and - np.allclose(_p0, p3()) and - np.allclose(_p0, p4()) and - np.allclose(_p0, p5()) + np.allclose(_p0, p1()) + and np.allclose(_p0, p2()) + and np.allclose(_p0, p3()) + and np.allclose(_p0, p4()) + and np.allclose(_p0, p5()) ) def test_common_errors(self): with pm.Model(): with pytest.raises(ValueError) as e: - Normal('n', observed=[[1]], total_size=[2, Ellipsis, 2, 2]) - assert 'Length of' in str(e.value) + Normal("n", observed=[[1]], total_size=[2, Ellipsis, 2, 2]) + assert "Length of" in str(e.value) with pytest.raises(ValueError) as e: - Normal('n', observed=[[1]], total_size=[2, 2, 2]) - assert 'Length of' in str(e.value) + Normal("n", observed=[[1]], total_size=[2, 2, 2]) + assert "Length of" in str(e.value) with pytest.raises(TypeError) as e: - Normal('n', observed=[[1]], total_size='foo') - assert 'Unrecognized' in str(e.value) + Normal("n", observed=[[1]], total_size="foo") + assert "Unrecognized" in str(e.value) with pytest.raises(TypeError) as e: - Normal('n', observed=[[1]], total_size=['foo']) - assert 'Unrecognized' in str(e.value) + Normal("n", observed=[[1]], total_size=["foo"]) + assert "Unrecognized" in str(e.value) with pytest.raises(ValueError) as e: - Normal('n', observed=[[1]], total_size=[Ellipsis, Ellipsis]) - assert 'Double Ellipsis' in str(e.value) + Normal("n", observed=[[1]], total_size=[Ellipsis, Ellipsis]) + assert "Double Ellipsis" in str(e.value) def test_mixed1(self): with pm.Model(): data = np.random.rand(10, 20, 30, 40, 50) mb = pm.Minibatch(data, [2, None, 20, Ellipsis, 10]) - Normal('n', observed=mb, total_size=(10, None, 30, Ellipsis, 50)) + Normal("n", observed=mb, total_size=(10, None, 30, Ellipsis, 50)) def test_mixed2(self): with pm.Model(): data = np.random.rand(10, 20, 30, 40, 50) mb = pm.Minibatch(data, [2, None, 20]) - Normal('n', observed=mb, total_size=(10, None, 30)) + Normal("n", observed=mb, total_size=(10, None, 30)) def test_free_rv(self): with pm.Model() as model4: - Normal('n', observed=[[1, 1], - [1, 1]], total_size=[2, 2]) + Normal("n", observed=[[1, 1], [1, 1]], total_size=[2, 2]) p4 = theano.function([], model4.logpt) with pm.Model() as model5: - Normal('n', total_size=[2, Ellipsis, 2], shape=(1, 1), broadcastable=(False, False)) + Normal("n", total_size=[2, Ellipsis, 2], shape=(1, 1), broadcastable=(False, False)) p5 = theano.function([model5.n], model5.logpt) assert p4() == p5(pm.floatX([[1]])) - assert p4() == p5(pm.floatX([[1, 1], - [1, 1]])) + assert p4() == p5(pm.floatX([[1, 1], [1, 1]])) -@pytest.mark.usefixtures('strict_float32') +@pytest.mark.usefixtures("strict_float32") class TestMinibatch: data = np.random.rand(30, 10, 40, 10, 50) diff --git a/pymc3/tests/test_model_func.py b/pymc3/tests/test_model_func.py index 9636d22c1d..5c167c099a 100644 --- a/pymc3/tests/test_model_func.py +++ b/pymc3/tests/test_model_func.py @@ -19,42 +19,43 @@ from .models import simple_model, mv_simple -tol = 2.0**-11 +tol = 2.0 ** -11 + def test_logp(): start, model, (mu, sig) = simple_model() lp = model.fastlogp lp(start) - close_to(lp(start), sp.norm.logpdf(start['x'], mu, sig).sum(), tol) + close_to(lp(start), sp.norm.logpdf(start["x"], mu, sig).sum(), tol) def test_dlogp(): start, model, (mu, sig) = simple_model() dlogp = model.fastdlogp() - close_to(dlogp(start), -(start['x'] - mu) / sig**2, 1. / sig**2 / 100.) + close_to(dlogp(start), -(start["x"] - mu) / sig ** 2, 1.0 / sig ** 2 / 100.0) def test_dlogp2(): start, model, (_, sig) = mv_simple() H = np.linalg.inv(sig) d2logp = model.fastd2logp() - close_to(d2logp(start), H, np.abs(H / 100.)) + close_to(d2logp(start), H, np.abs(H / 100.0)) def test_deterministic(): with pm.Model() as model: - x = pm.Normal('x', 0, 1) - y = pm.Deterministic('y', x**2) + x = pm.Normal("x", 0, 1) + y = pm.Deterministic("y", x ** 2) assert model.y == y - assert model['y'] == y + assert model["y"] == y def test_mapping(): with pm.Model() as model: - mu = pm.Normal('mu', 0, 1) - sd = pm.Gamma('sd', 1, 1) - y = pm.Normal('y', mu, sd, observed=np.array([.1, .5])) + mu = pm.Normal("mu", 0, 1) + sd = pm.Gamma("sd", 1, 1) + y = pm.Normal("y", mu, sd, observed=np.array([0.1, 0.5])) lp = model.fastlogp lparray = model.logp_array point = model.test_point diff --git a/pymc3/tests/test_model_graph.py b/pymc3/tests/test_model_graph.py index 56c8c82a1e..4ecf10c8b5 100644 --- a/pymc3/tests/test_model_graph.py +++ b/pymc3/tests/test_model_graph.py @@ -23,7 +23,7 @@ def radon_model(): """Similar in shape to the Radon model""" n_homes = 919 counties = 85 - uranium = np.random.normal(-.1, 0.4, size=n_homes) + uranium = np.random.normal(-0.1, 0.4, size=n_homes) xbar = np.random.normal(1, 0.1, size=n_homes) floor_measure = np.random.randint(0, 2, size=n_homes) log_radon = np.random.normal(1, 1, size=n_homes) @@ -31,36 +31,33 @@ def radon_model(): floor_measure = th.shared(floor_measure) d, r = divmod(919, 85) - county = np.hstack(( - np.tile(np.arange(counties, dtype=int), d), - np.arange(r) - )) + county = np.hstack((np.tile(np.arange(counties, dtype=int), d), np.arange(r))) with pm.Model() as model: - sigma_a = pm.HalfCauchy('sigma_a', 5) - gamma = pm.Normal('gamma', mu=0., sigma=1e5, shape=3) - mu_a = pm.Deterministic('mu_a', gamma[0] + gamma[1]*uranium + gamma[2]*xbar) - eps_a = pm.Normal('eps_a', mu=0, sigma=sigma_a, shape=counties) - a = pm.Deterministic('a', mu_a + eps_a[county]) - b = pm.Normal('b', mu=0., sigma=1e15) - sigma_y = pm.Uniform('sigma_y', lower=0, upper=100) + sigma_a = pm.HalfCauchy("sigma_a", 5) + gamma = pm.Normal("gamma", mu=0.0, sigma=1e5, shape=3) + mu_a = pm.Deterministic("mu_a", gamma[0] + gamma[1] * uranium + gamma[2] * xbar) + eps_a = pm.Normal("eps_a", mu=0, sigma=sigma_a, shape=counties) + a = pm.Deterministic("a", mu_a + eps_a[county]) + b = pm.Normal("b", mu=0.0, sigma=1e15) + sigma_y = pm.Uniform("sigma_y", lower=0, upper=100) y_hat = a + b * floor_measure - y_like = pm.Normal('y_like', mu=y_hat, sigma=sigma_y, observed=log_radon) + y_like = pm.Normal("y_like", mu=y_hat, sigma=sigma_y, observed=log_radon) compute_graph = { - 'sigma_a': set(), - 'gamma': set(), - 'mu_a': {'gamma'}, - 'eps_a': {'sigma_a'}, - 'a': {'mu_a', 'eps_a'}, - 'b': set(), - 'sigma_y': set(), - 'y_like': {'a', 'b', 'sigma_y'} + "sigma_a": set(), + "gamma": set(), + "mu_a": {"gamma"}, + "eps_a": {"sigma_a"}, + "a": {"mu_a", "eps_a"}, + "b": set(), + "sigma_y": set(), + "y_like": {"a", "b", "sigma_y"}, } plates = { - (): {'b', 'sigma_a', 'sigma_y'}, - (3,): {'gamma'}, - (85,): {'eps_a'}, - (919,): {'a', 'mu_a', 'y_like'}, + (): {"b", "sigma_a", "sigma_y"}, + (3,): {"gamma"}, + (85,): {"eps_a"}, + (919,): {"a", "mu_a", "y_like"}, } return model, compute_graph, plates diff --git a/pymc3/tests/test_model_helpers.py b/pymc3/tests/test_model_helpers.py index a8f546de4d..676cd75e7a 100644 --- a/pymc3/tests/test_model_helpers.py +++ b/pymc3/tests/test_model_helpers.py @@ -34,21 +34,20 @@ def test_pandas_to_array(self): sparse_input = sps.csr_matrix(np.eye(3)) dense_input = np.arange(9).reshape((3, 3)) - input_name = 'input_variable' + input_name = "input_variable" theano_graph_input = tt.as_tensor(dense_input, name=input_name) pandas_input = pd.DataFrame(dense_input) # All the even numbers are replaced with NaN - missing_pandas_input = pd.DataFrame(np.array([[np.nan, 1, np.nan], - [3, np.nan, 5], - [np.nan, 7, np.nan]])) - masked_array_input = ma.array(dense_input, - mask=(np.mod(dense_input, 2) == 0)) + missing_pandas_input = pd.DataFrame( + np.array([[np.nan, 1, np.nan], [3, np.nan, 5], [np.nan, 7, np.nan]]) + ) + masked_array_input = ma.array(dense_input, mask=(np.mod(dense_input, 2) == 0)) # Create a generator object. Apparently the generator object needs to # yield numpy arrays. - square_generator = (np.array([i**2], dtype=int) for i in range(100)) + square_generator = (np.array([i ** 2], dtype=int) for i in range(100)) # Alias the function to be tested func = pm.model.pandas_to_array @@ -68,8 +67,7 @@ def test_pandas_to_array(self): sparse_output = func(sparse_input) assert sps.issparse(sparse_output) assert sparse_output.shape == sparse_input.shape - npt.assert_allclose(sparse_output.toarray(), - sparse_input.toarray()) + npt.assert_allclose(sparse_output.toarray(), sparse_input.toarray()) # Check function behavior when using masked array inputs and pandas # objects with missing data @@ -103,11 +101,10 @@ def test_as_tensor(self): should return a Sparse Theano object. """ # Create the various inputs to the function - input_name = 'testing_inputs' + input_name = "testing_inputs" sparse_input = sps.csr_matrix(np.eye(3)) dense_input = np.arange(9).reshape((3, 3)) - masked_array_input = ma.array(dense_input, - mask=(np.mod(dense_input, 2) == 0)) + masked_array_input = ma.array(dense_input, mask=(np.mod(dense_input, 2) == 0)) # Create a fake model and fake distribution to be used for the test fake_model = pm.Model() @@ -120,18 +117,9 @@ def test_as_tensor(self): func = pm.model.as_tensor # Check function behavior using the various inputs - dense_output = func(dense_input, - input_name, - fake_model, - fake_distribution) - sparse_output = func(sparse_input, - input_name, - fake_model, - fake_distribution) - masked_output = func(masked_array_input, - input_name, - fake_model, - fake_distribution) + dense_output = func(dense_input, input_name, fake_model, fake_distribution) + sparse_output = func(sparse_input, input_name, fake_model, fake_distribution) + masked_output = func(masked_array_input, input_name, fake_model, fake_distribution) # Ensure that the missing values are appropriately set to None for func_output in [dense_output, sparse_output]: diff --git a/pymc3/tests/test_modelcontext.py b/pymc3/tests/test_modelcontext.py index 4b0233f708..d7cc4a6166 100644 --- a/pymc3/tests/test_modelcontext.py +++ b/pymc3/tests/test_modelcontext.py @@ -21,7 +21,7 @@ class TestModelContext: def test_thread_safety(self): - """ Regression test for issue #1552: Thread safety of model context manager + """Regression test for issue #1552: Thread safety of model context manager This test creates two threads that attempt to construct two unrelated models at the same time. @@ -29,21 +29,24 @@ def test_thread_safety(self): that thread A enters the context manager first, then B, then A attempts to declare a variable while B is still in the context manager. """ - aInCtxt,bInCtxt,aDone = [threading.Event() for _ in range(3)] + aInCtxt, bInCtxt, aDone = [threading.Event() for _ in range(3)] modelA = Model() modelB = Model() + def make_model_a(): with modelA: aInCtxt.set() bInCtxt.wait() - Normal('a',0,1) + Normal("a", 0, 1) aDone.set() + def make_model_b(): aInCtxt.wait() with modelB: bInCtxt.set() aDone.wait() - Normal('b', 0, 1) + Normal("b", 0, 1) + threadA = threading.Thread(target=make_model_a) threadB = threading.Thread(target=make_model_b) threadA.start() @@ -56,9 +59,10 @@ def make_model_b(): # - A leaves it's model context before B attempts to declare b. A's context manager # takes B from the stack, such that b ends up in model A assert ( - list(modelA.named_vars), - list(modelB.named_vars), - ) == (['a'],['b']) + list(modelA.named_vars), + list(modelB.named_vars), + ) == (["a"], ["b"]) + def test_mixed_contexts(): modelA = Model() diff --git a/pymc3/tests/test_models_linear.py b/pymc3/tests/test_models_linear.py index 5a6180631a..b95795a35a 100644 --- a/pymc3/tests/test_models_linear.py +++ b/pymc3/tests/test_models_linear.py @@ -32,7 +32,7 @@ def setup_class(cls): super().setup_class() cls.intercept = 1 cls.slope = 3 - cls.sd = .05 + cls.sd = 0.05 x_linear, cls.y_linear = generate_data(cls.intercept, cls.slope, size=1000) cls.y_linear += np.random.normal(size=1000, scale=cls.sd) cls.data_linear = dict(x=x_linear, y=cls.y_linear) @@ -43,86 +43,68 @@ def setup_class(cls): cls.data_logistic = dict(x=x_logistic, y=bern_trials) def test_linear_component(self): - vars_to_create = { - 'sigma', - 'sigma_interval__', - 'y_obs', - 'lm_x0', - 'lm_Intercept' - } + vars_to_create = {"sigma", "sigma_interval__", "y_obs", "lm_x0", "lm_Intercept"} with Model() as model: lm = LinearComponent( - self.data_linear['x'], - self.data_linear['y'], - name='lm' - ) # yields lm_x0, lm_Intercept - sigma = Uniform('sigma', 0, 20) # yields sigma_interval__ - Normal('y_obs', mu=lm.y_est, sigma=sigma, observed=self.y_linear) # yields y_obs + self.data_linear["x"], self.data_linear["y"], name="lm" + ) # yields lm_x0, lm_Intercept + sigma = Uniform("sigma", 0, 20) # yields sigma_interval__ + Normal("y_obs", mu=lm.y_est, sigma=sigma, observed=self.y_linear) # yields y_obs start = find_MAP(vars=[sigma]) step = Slice(model.vars) - trace = sample(500, tune=0, step=step, start=start, - progressbar=False, random_seed=self.random_seed) + trace = sample( + 500, tune=0, step=step, start=start, progressbar=False, random_seed=self.random_seed + ) - assert round(abs(np.mean(trace['lm_Intercept'])-self.intercept), 1) == 0 - assert round(abs(np.mean(trace['lm_x0'])-self.slope), 1) == 0 - assert round(abs(np.mean(trace['sigma'])-self.sd), 1) == 0 + assert round(abs(np.mean(trace["lm_Intercept"]) - self.intercept), 1) == 0 + assert round(abs(np.mean(trace["lm_x0"]) - self.slope), 1) == 0 + assert round(abs(np.mean(trace["sigma"]) - self.sd), 1) == 0 assert vars_to_create == set(model.named_vars.keys()) def test_linear_component_from_formula(self): with Model() as model: - lm = LinearComponent.from_formula('y ~ x', self.data_linear) - sigma = Uniform('sigma', 0, 20) - Normal('y_obs', mu=lm.y_est, sigma=sigma, observed=self.y_linear) + lm = LinearComponent.from_formula("y ~ x", self.data_linear) + sigma = Uniform("sigma", 0, 20) + Normal("y_obs", mu=lm.y_est, sigma=sigma, observed=self.y_linear) start = find_MAP(vars=[sigma]) step = Slice(model.vars) - trace = sample(500, tune=0, step=step, start=start, - progressbar=False, - random_seed=self.random_seed) + trace = sample( + 500, tune=0, step=step, start=start, progressbar=False, random_seed=self.random_seed + ) - assert round(abs(np.mean(trace['Intercept'])-self.intercept), 1) == 0 - assert round(abs(np.mean(trace['x'])-self.slope), 1) == 0 - assert round(abs(np.mean(trace['sigma'])-self.sd), 1) == 0 + assert round(abs(np.mean(trace["Intercept"]) - self.intercept), 1) == 0 + assert round(abs(np.mean(trace["x"]) - self.slope), 1) == 0 + assert round(abs(np.mean(trace["sigma"]) - self.sd), 1) == 0 def test_glm(self): with Model() as model: - vars_to_create = { - 'glm_sd', - 'glm_sd_log__', - 'glm_y', - 'glm_x0', - 'glm_Intercept' - } - GLM( - self.data_linear['x'], - self.data_linear['y'], - name='glm' - ) + vars_to_create = {"glm_sd", "glm_sd_log__", "glm_y", "glm_x0", "glm_Intercept"} + GLM(self.data_linear["x"], self.data_linear["y"], name="glm") start = find_MAP() step = Slice(model.vars) - trace = sample(500, tune=0, step=step, start=start, - progressbar=False, random_seed=self.random_seed) - assert round(abs(np.mean(trace['glm_Intercept'])-self.intercept), 1) == 0 - assert round(abs(np.mean(trace['glm_x0'])-self.slope), 1) == 0 - assert round(abs(np.mean(trace['glm_sd'])-self.sd), 1) == 0 + trace = sample( + 500, tune=0, step=step, start=start, progressbar=False, random_seed=self.random_seed + ) + assert round(abs(np.mean(trace["glm_Intercept"]) - self.intercept), 1) == 0 + assert round(abs(np.mean(trace["glm_x0"]) - self.slope), 1) == 0 + assert round(abs(np.mean(trace["glm_sd"]) - self.sd), 1) == 0 assert vars_to_create == set(model.named_vars.keys()) def test_glm_from_formula(self): with Model() as model: - NAME = 'glm' - GLM.from_formula('y ~ x', self.data_linear, name=NAME) + NAME = "glm" + GLM.from_formula("y ~ x", self.data_linear, name=NAME) start = find_MAP() step = Slice(model.vars) - trace = sample(500, tune=0, step=step, start=start, - progressbar=False, random_seed=self.random_seed) + trace = sample( + 500, tune=0, step=step, start=start, progressbar=False, random_seed=self.random_seed + ) - assert round(abs(np.mean(trace['%s_Intercept' % NAME])-self.intercept), 1) == 0 - assert round(abs(np.mean(trace['%s_x' % NAME])-self.slope), 1) == 0 - assert round(abs(np.mean(trace['%s_sd' % NAME])-self.sd), 1) == 0 + assert round(abs(np.mean(trace["%s_Intercept" % NAME]) - self.intercept), 1) == 0 + assert round(abs(np.mean(trace["%s_x" % NAME]) - self.slope), 1) == 0 + assert round(abs(np.mean(trace["%s_sd" % NAME]) - self.sd), 1) == 0 def test_strange_types(self): with Model(): - with pytest.raises( - ValueError): - GLM(1, - self.data_linear['y'], - name='lm') + with pytest.raises(ValueError): + GLM(1, self.data_linear["y"], name="lm") diff --git a/pymc3/tests/test_models_utils.py b/pymc3/tests/test_models_utils.py index 96e940d4a1..4b1e5f42bc 100644 --- a/pymc3/tests/test_models_utils.py +++ b/pymc3/tests/test_models_utils.py @@ -24,65 +24,56 @@ def setup_method(self): self.data = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6])) def assertMatrixLabels(self, m, l, mt=None, lt=None): - assert np.all( - np.equal( - m.eval(), - mt if mt is not None else self.data.values - ) - ) + assert np.all(np.equal(m.eval(), mt if mt is not None else self.data.values)) assert l == list(lt or self.data.columns) def test_numpy_init(self): m, l = utils.any_to_tensor_and_labels(self.data.values) - self.assertMatrixLabels(m, l, lt=['x0', 'x1']) - m, l = utils.any_to_tensor_and_labels(self.data.values, labels=['x2', 'x3']) - self.assertMatrixLabels(m, l, lt=['x2', 'x3']) + self.assertMatrixLabels(m, l, lt=["x0", "x1"]) + m, l = utils.any_to_tensor_and_labels(self.data.values, labels=["x2", "x3"]) + self.assertMatrixLabels(m, l, lt=["x2", "x3"]) def test_pandas_init(self): m, l = utils.any_to_tensor_and_labels(self.data) self.assertMatrixLabels(m, l) - m, l = utils.any_to_tensor_and_labels(self.data, labels=['x2', 'x3']) - self.assertMatrixLabels(m, l, lt=['x2', 'x3']) + m, l = utils.any_to_tensor_and_labels(self.data, labels=["x2", "x3"]) + self.assertMatrixLabels(m, l, lt=["x2", "x3"]) @pytest.mark.xfail def test_dict_input(self): - m, l = utils.any_to_tensor_and_labels(self.data.to_dict('dict')) + m, l = utils.any_to_tensor_and_labels(self.data.to_dict("dict")) self.assertMatrixLabels(m, l, mt=self.data[l].values, lt=l) - m, l = utils.any_to_tensor_and_labels(self.data.to_dict('series')) + m, l = utils.any_to_tensor_and_labels(self.data.to_dict("series")) self.assertMatrixLabels(m, l, mt=self.data[l].values, lt=l) - m, l = utils.any_to_tensor_and_labels(self.data.to_dict('list')) + m, l = utils.any_to_tensor_and_labels(self.data.to_dict("list")) self.assertMatrixLabels(m, l, mt=self.data[l].values, lt=l) - inp = {k: tt.as_tensor_variable(v.values) for k, v in self.data.to_dict('series').items()} + inp = {k: tt.as_tensor_variable(v.values) for k, v in self.data.to_dict("series").items()} m, l = utils.any_to_tensor_and_labels(inp) self.assertMatrixLabels(m, l, mt=self.data[l].values, lt=l) def test_list_input(self): m, l = utils.any_to_tensor_and_labels(self.data.values.tolist()) - self.assertMatrixLabels(m, l, lt=['x0', 'x1']) - m, l = utils.any_to_tensor_and_labels(self.data.values.tolist(), labels=['x2', 'x3']) - self.assertMatrixLabels(m, l, lt=['x2', 'x3']) + self.assertMatrixLabels(m, l, lt=["x0", "x1"]) + m, l = utils.any_to_tensor_and_labels(self.data.values.tolist(), labels=["x2", "x3"]) + self.assertMatrixLabels(m, l, lt=["x2", "x3"]) def test_tensor_input(self): m, l = utils.any_to_tensor_and_labels( - tt.as_tensor_variable(self.data.values.tolist()), - labels=['x0', 'x1'] + tt.as_tensor_variable(self.data.values.tolist()), labels=["x0", "x1"] ) - self.assertMatrixLabels(m, l, lt=['x0', 'x1']) + self.assertMatrixLabels(m, l, lt=["x0", "x1"]) m, l = utils.any_to_tensor_and_labels( - tt.as_tensor_variable(self.data.values.tolist()), - labels=['x2', 'x3']) - self.assertMatrixLabels(m, l, lt=['x2', 'x3']) + tt.as_tensor_variable(self.data.values.tolist()), labels=["x2", "x3"] + ) + self.assertMatrixLabels(m, l, lt=["x2", "x3"]) def test_user_mistakes(self): # no labels for tensor variable - with pytest.raises( - ValueError): + with pytest.raises(ValueError): utils.any_to_tensor_and_labels(tt.as_tensor_variable(self.data.values.tolist())) # len of labels is bad - with pytest.raises( - ValueError): - utils.any_to_tensor_and_labels(self.data.values.tolist(), - labels=['x']) + with pytest.raises(ValueError): + utils.any_to_tensor_and_labels(self.data.values.tolist(), labels=["x"]) diff --git a/pymc3/tests/test_parallel_sampling.py b/pymc3/tests/test_parallel_sampling.py index e61007ac8d..b5de8332cc 100644 --- a/pymc3/tests/test_parallel_sampling.py +++ b/pymc3/tests/test_parallel_sampling.py @@ -24,8 +24,8 @@ def test_context(): with pm.Model(): - pm.Normal('x') - ctx = multiprocessing.get_context('spawn') + pm.Normal("x") + ctx = multiprocessing.get_context("spawn") pm.sample(tune=2, draws=2, chains=2, cores=2, mp_ctx=ctx) @@ -39,15 +39,22 @@ def __setstate__(self, state): def test_bad_unpickle(): with pm.Model() as model: - pm.Normal('x') + pm.Normal("x") with model: step = pm.NUTS() step.no_unpickle = NoUnpickle() with pytest.raises(Exception) as exc_info: - pm.sample(tune=2, draws=2, mp_ctx='spawn', step=step, - cores=2, chains=2, compute_convergence_checks=False) - assert 'could not be unpickled' in str(exc_info.getrepr(style='short')) + pm.sample( + tune=2, + draws=2, + mp_ctx="spawn", + step=step, + cores=2, + chains=2, + compute_convergence_checks=False, + ) + assert "could not be unpickled" in str(exc_info.getrepr(style="short")) tt_vector = tt.TensorType(theano.config.floatX, [False]) @@ -62,35 +69,43 @@ def _crash_remote_process(a, master_pid): def test_dill(): with pm.Model(): - pm.Normal('x') + pm.Normal("x") pm.sample(tune=1, draws=1, chains=2, cores=2, pickle_backend="dill", mp_ctx="spawn") def test_remote_pipe_closed(): master_pid = os.getpid() with pm.Model(): - x = pm.Normal('x', shape=2, mu=0.1) - tt_pid = tt.as_tensor_variable(np.array(master_pid, dtype='int32')) - pm.Normal('y', mu=_crash_remote_process(x, tt_pid), shape=2) + x = pm.Normal("x", shape=2, mu=0.1) + tt_pid = tt.as_tensor_variable(np.array(master_pid, dtype="int32")) + pm.Normal("y", mu=_crash_remote_process(x, tt_pid), shape=2) step = pm.Metropolis() with pytest.raises(RuntimeError, match="Chain [0-9] failed"): - pm.sample(step=step, mp_ctx='spawn', tune=2, draws=2, cores=2, chains=2) + pm.sample(step=step, mp_ctx="spawn", tune=2, draws=2, cores=2, chains=2) def test_abort(): with pm.Model() as model: - a = pm.Normal('a', shape=1) - pm.HalfNormal('b') + a = pm.Normal("a", shape=1) + pm.HalfNormal("b") step1 = pm.NUTS([a]) step2 = pm.Metropolis([model.b_log__]) step = pm.CompoundStep([step1, step2]) ctx = multiprocessing.get_context() - proc = ps.ProcessAdapter(10, 10, step, chain=3, seed=1, mp_ctx=ctx, - start={'a': 1., 'b_log__': 2.}, - step_method_pickled=None, pickle_backend='pickle') + proc = ps.ProcessAdapter( + 10, + 10, + step, + chain=3, + seed=1, + mp_ctx=ctx, + start={"a": 1.0, "b_log__": 2.0}, + step_method_pickled=None, + pickle_backend="pickle", + ) proc.start() proc.write_next() proc.abort() @@ -99,17 +114,25 @@ def test_abort(): def test_explicit_sample(): with pm.Model() as model: - a = pm.Normal('a', shape=1) - pm.HalfNormal('b') + a = pm.Normal("a", shape=1) + pm.HalfNormal("b") step1 = pm.NUTS([a]) step2 = pm.Metropolis([model.b_log__]) step = pm.CompoundStep([step1, step2]) ctx = multiprocessing.get_context() - proc = ps.ProcessAdapter(10, 10, step, chain=3, seed=1, mp_ctx=ctx, - start={'a': 1., 'b_log__': 2.}, - step_method_pickled=None, pickle_backend='pickle') + proc = ps.ProcessAdapter( + 10, + 10, + step, + chain=3, + seed=1, + mp_ctx=ctx, + start={"a": 1.0, "b_log__": 2.0}, + step_method_pickled=None, + pickle_backend="pickle", + ) proc.start() while True: proc.write_next() @@ -124,16 +147,15 @@ def test_explicit_sample(): def test_iterator(): with pm.Model() as model: - a = pm.Normal('a', shape=1) - pm.HalfNormal('b') + a = pm.Normal("a", shape=1) + pm.HalfNormal("b") step1 = pm.NUTS([a]) step2 = pm.Metropolis([model.b_log__]) step = pm.CompoundStep([step1, step2]) - start = {'a': 1., 'b_log__': 2.} - sampler = ps.ParallelSampler(10, 10, 3, 2, [2, 3, 4], [start] * 3, - step, 0, False) + start = {"a": 1.0, "b_log__": 2.0} + sampler = ps.ParallelSampler(10, 10, 3, 2, [2, 3, 4], [start] * 3, step, 0, False) with sampler: for draw in sampler: pass diff --git a/pymc3/tests/test_pickling.py b/pymc3/tests/test_pickling.py index 2269286bab..7cb56c4a31 100644 --- a/pymc3/tests/test_pickling.py +++ b/pymc3/tests/test_pickling.py @@ -23,12 +23,12 @@ def setup_method(self): def test_model_roundtrip(self): m = self.model - for proto in range(pickle.HIGHEST_PROTOCOL+1): + for proto in range(pickle.HIGHEST_PROTOCOL + 1): try: s = pickle.dumps(m, proto) pickle.loads(s) except Exception: raise AssertionError( - "Exception while trying roundtrip with pickle protocol %d:\n" % proto + - ''.join(traceback.format_exc()) + "Exception while trying roundtrip with pickle protocol %d:\n" % proto + + "".join(traceback.format_exc()) ) diff --git a/pymc3/tests/test_posdef_sym.py b/pymc3/tests/test_posdef_sym.py index 6ff5e1d4f4..a71c500649 100644 --- a/pymc3/tests/test_posdef_sym.py +++ b/pymc3/tests/test_posdef_sym.py @@ -18,27 +18,25 @@ def test_posdef_symmetric1(): - data = np.array([[1., 0], [0, 1]], dtype=theano.config.floatX) + data = np.array([[1.0, 0], [0, 1]], dtype=theano.config.floatX) assert mv.posdef(data) == 1 def test_posdef_symmetric2(): - data = np.array([[1., 2], [2, 1]], dtype=theano.config.floatX) + data = np.array([[1.0, 2], [2, 1]], dtype=theano.config.floatX) assert mv.posdef(data) == 0 def test_posdef_symmetric3(): - """ The test return 0 if the matrix has 0 eigenvalue. + """The test return 0 if the matrix has 0 eigenvalue. Is this correct? """ - data = np.array([[1., 1], [1, 1]], dtype=theano.config.floatX) + data = np.array([[1.0, 1], [1, 1]], dtype=theano.config.floatX) assert mv.posdef(data) == 0 def test_posdef_symmetric4(): - d = np.array([[1, .99, 1], - [.99, 1, .999], - [1, .999, 1]], theano.config.floatX) + d = np.array([[1, 0.99, 1], [0.99, 1, 0.999], [1, 0.999, 1]], theano.config.floatX) assert mv.posdef(d) == 0 diff --git a/pymc3/tests/test_posterior_predictive.py b/pymc3/tests/test_posterior_predictive.py index f2f1571b80..7e812c7955 100644 --- a/pymc3/tests/test_posterior_predictive.py +++ b/pymc3/tests/test_posterior_predictive.py @@ -4,8 +4,9 @@ from pymc3.backends.ndarray import point_list_to_multitrace + def test_translate_point_list(): - with pm.Model() as model: + with pm.Model() as model: mu = pm.Normal("mu", 0.0, 1.0) a = pm.Normal("a", mu=mu, sigma=1, observed=0.0) mt = point_list_to_multitrace([model.test_point], model) @@ -13,24 +14,25 @@ def test_translate_point_list(): assert {"mu"} == set(mt.varnames) assert len(mt) == 1 + def test_build_TraceDict(): - with pm.Model() as model: - mu = pm.Normal("mu", 0.0, 1.0) - a = pm.Normal("a", mu=mu, sigma=1, observed=np.array([0.5, 0.2])) - trace = pm.sample(chains=2, draws=500) - dict = _TraceDict(multi_trace=trace) - assert isinstance(dict, _TraceDict) - assert len(dict) == 1000 - np.testing.assert_array_equal(trace['mu'], dict['mu']) - assert set(trace.varnames) == set(dict.varnames) == {"mu"} + with pm.Model() as model: + mu = pm.Normal("mu", 0.0, 1.0) + a = pm.Normal("a", mu=mu, sigma=1, observed=np.array([0.5, 0.2])) + trace = pm.sample(chains=2, draws=500) + dict = _TraceDict(multi_trace=trace) + assert isinstance(dict, _TraceDict) + assert len(dict) == 1000 + np.testing.assert_array_equal(trace["mu"], dict["mu"]) + assert set(trace.varnames) == set(dict.varnames) == {"mu"} def test_build_TraceDict_point_list(): - with pm.Model() as model: - mu = pm.Normal("mu", 0.0, 1.0) - a = pm.Normal("a", mu=mu, sigma=1, observed=np.array([0.5, 0.2])) - dict = _TraceDict(point_list=[model.test_point]) - assert set(dict.varnames) == {"mu"} - assert len(dict) == 1 - assert len(dict["mu"]) == 1 - assert dict["mu"][0] == 0.0 + with pm.Model() as model: + mu = pm.Normal("mu", 0.0, 1.0) + a = pm.Normal("a", mu=mu, sigma=1, observed=np.array([0.5, 0.2])) + dict = _TraceDict(point_list=[model.test_point]) + assert set(dict.varnames) == {"mu"} + assert len(dict) == 1 + assert len(dict["mu"]) == 1 + assert dict["mu"][0] == 0.0 From 3e79b71496952a6824ee8053ce89a341fcd1a503 Mon Sep 17 00:00:00 2001 From: Yash1256 Date: Tue, 29 Sep 2020 22:59:53 +0530 Subject: [PATCH 2/5] Run command pre-commit run --- pymc3/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pymc3/model.py b/pymc3/model.py index 58cbab99d1..17f1edaf6c 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -715,7 +715,7 @@ def __call__(self, array, grad_out=None, extra_vars=None): if array.shape != (self.size,): raise ValueError( - "Invalid shape for array. Must be %s but is %s." % ((self.size,), array.shape) + "Invalid shape for array. Must be {} but is {}.".format((self.size,), array.shape) ) if grad_out is None: @@ -748,7 +748,7 @@ def array_to_dict(self, array): raise ValueError(f"Array should have shape ({self.size},) but has {array.shape}") if array.dtype != self.dtype: raise ValueError( - "Array has invalid dtype. Should be %s but is %s" % (self._dtype, self.dtype) + f"Array has invalid dtype. Should be {self._dtype} but is {self.dtype}" ) point = {} for varmap in self._ordering.vmap: From b09f747bb555ef69258d78ae377148b91a588de6 Mon Sep 17 00:00:00 2001 From: Yash1256 Date: Wed, 30 Sep 2020 02:17:59 +0530 Subject: [PATCH 3/5] removed quotes --- pymc3/model.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pymc3/model.py b/pymc3/model.py index 17f1edaf6c..b80670a99e 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -524,7 +524,7 @@ def tree_contains(self, item): def __setitem__(self, key, value): raise NotImplementedError( - "Method is removed as we are not" " able to determine " "appropriate logic for it" + "Method is removed as we are not able to determine appropriate logic for it" ) # Added this because mypy didn't like having __imul__ without __mul__ @@ -972,7 +972,7 @@ def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs): for var in grad_vars: if var.dtype not in continuous_types: raise ValueError( - "Can only compute the gradient of " "continuous types: %s" % var + "Can only compute the gradient of continuous types: %s" % var ) if tempered: @@ -1609,7 +1609,7 @@ def _get_scaling(total_size, shape, ndim): coef = tt.prod(coefs) else: raise TypeError( - "Unrecognized `total_size` type, expected " "int or list of ints, got %r" % total_size + "Unrecognized `total_size` type, expected int or list of ints, got %r" % total_size ) return tt.as_tensor(floatX(coef)) From 8849b962860c759ff82aad238f76fe84659e121b Mon Sep 17 00:00:00 2001 From: Yash Shukla <56587507+Yash1256@users.noreply.github.com> Date: Tue, 6 Oct 2020 13:09:22 +0530 Subject: [PATCH 4/5] Update test_examples.py Changes Done Sir used # fmt: off before and # fmt:on after the array --- pymc3/tests/test_examples.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/pymc3/tests/test_examples.py b/pymc3/tests/test_examples.py index fbb5db0d8c..d86f488bd4 100644 --- a/pymc3/tests/test_examples.py +++ b/pymc3/tests/test_examples.py @@ -51,7 +51,10 @@ def get_city_data(): class TestARM5_4(SeededTest): def build_model(self): data = pd.read_csv( - pm.get_data("wells.dat"), delimiter=" ", index_col="id", dtype={"switch": np.int8} + pm.get_data("wells.dat"), + delimiter=" ", + index_col="id", + dtype={"switch": np.int8}, ) data.dist /= 100 data.educ /= 4 @@ -87,7 +90,9 @@ def build_model(self): groupsd = pm.Uniform("groupsd", 0, 10.0) sd = pm.Uniform("sd", 0, 10.0) floor_m = pm.Normal("floor_m", 0, 5.0 ** -2.0) - means = pm.Normal("means", groupmean, groupsd ** -2.0, shape=len(self.obs_means)) + means = pm.Normal( + "means", groupmean, groupsd ** -2.0, shape=len(self.obs_means) + ) pm.Normal("lr", floor * floor_m + means[group], sd ** -2.0, observed=lradon) return model @@ -102,7 +107,8 @@ def too_slow(self): } with model: start = pm.find_MAP( - start=start, vars=[model["groupmean"], model["sd_interval__"], model["floor_m"]] + start=start, + vars=[model["groupmean"], model["sd_interval__"], model["floor_m"]], ) step = pm.NUTS(model.vars, scaling=start) pm.sample(50, step=step, start=start) @@ -124,9 +130,14 @@ def build_model(self): sd = pm.Uniform("sd", 0, 10.0) floor_m = pm.Normal("floor_m", 0, 5.0 ** -2.0) u_m = pm.Normal("u_m", 0, 5.0 ** -2) - means = pm.Normal("means", groupmean, groupsd ** -2.0, shape=len(self.obs_means)) + means = pm.Normal( + "means", groupmean, groupsd ** -2.0, shape=len(self.obs_means) + ) pm.Normal( - "lr", floor * floor_m + means[group] + ufull * u_m, sd ** -2.0, observed=lradon + "lr", + floor * floor_m + means[group] + ufull * u_m, + sd ** -2.0, + observed=lradon, ) return model @@ -152,6 +163,7 @@ def too_slow(self): pm.sample(50, step=step, start=start) +# fmt: off def build_disaster_model(masked=False): disasters_data = np.array( [ @@ -268,6 +280,7 @@ def build_disaster_model(masked=False): 1, ] ) + # fmt: on if masked: disasters_data[[23, 68]] = -1 disasters_data = np.ma.masked_values(disasters_data, value=-1) From e870a61b1c8caac3bb93f083d8ec298946d70642 Mon Sep 17 00:00:00 2001 From: Yash Shukla <56587507+Yash1256@users.noreply.github.com> Date: Tue, 6 Oct 2020 13:54:37 +0530 Subject: [PATCH 5/5] Update test_examples.py Changes updated --- pymc3/tests/test_examples.py | 149 +++++------------------------------ 1 file changed, 19 insertions(+), 130 deletions(-) diff --git a/pymc3/tests/test_examples.py b/pymc3/tests/test_examples.py index d86f488bd4..409363f39e 100644 --- a/pymc3/tests/test_examples.py +++ b/pymc3/tests/test_examples.py @@ -163,123 +163,15 @@ def too_slow(self): pm.sample(50, step=step, start=start) -# fmt: off def build_disaster_model(masked=False): - disasters_data = np.array( - [ - 4, - 5, - 4, - 0, - 1, - 4, - 3, - 4, - 0, - 6, - 3, - 3, - 4, - 0, - 2, - 6, - 3, - 3, - 5, - 4, - 5, - 3, - 1, - 4, - 4, - 1, - 5, - 5, - 3, - 4, - 2, - 5, - 2, - 2, - 3, - 4, - 2, - 1, - 3, - 2, - 2, - 1, - 1, - 1, - 1, - 3, - 0, - 0, - 1, - 0, - 1, - 1, - 0, - 0, - 3, - 1, - 0, - 3, - 2, - 2, - 0, - 1, - 1, - 1, - 0, - 1, - 0, - 1, - 0, - 0, - 0, - 2, - 1, - 0, - 0, - 0, - 1, - 1, - 0, - 2, - 3, - 3, - 1, - 1, - 2, - 1, - 1, - 1, - 1, - 2, - 4, - 2, - 0, - 0, - 1, - 4, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 1, - 0, - 1, - ] - ) + # fmt: off + disasters_data = np.array([4, 5, 4, 0, 1, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6, + 3, 3, 5, 4, 5, 3, 1, 4, 4, 1, 5, 5, 3, 4, 2, 5, + 2, 2, 3, 4, 2, 1, 3, 2, 2, 1, 1, 1, 1, 3, 0, 0, + 1, 0, 1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1, + 0, 1, 0, 1, 0, 0, 0, 2, 1, 0, 0, 0, 1, 1, 0, 2, + 3, 3, 1, 1, 2, 1, 1, 1, 1, 2, 4, 2, 0, 0, 1, 4, + 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1]) # fmt: on if masked: disasters_data[[23, 68]] = -1 @@ -301,7 +193,9 @@ def build_disaster_model(masked=False): return model -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") +@pytest.mark.xfail( + condition=(theano.config.floatX == "float32"), reason="Fails on float32" +) class TestDisasterModel(SeededTest): # Time series of recorded coal mining disasters in the UK from 1851 to 1962 def test_disaster_model(self): @@ -331,7 +225,11 @@ def build_model(self): true_intercept = 1 true_slope = 2 self.x = np.linspace(0, 1, size) - self.y = true_intercept + self.x * true_slope + np.random.normal(scale=0.5, size=size) + self.y = ( + true_intercept + + self.x * true_slope + + np.random.normal(scale=0.5, size=size) + ) data = dict(x=self.x, y=self.y) with pm.Model() as model: pm.GLM.from_formula("y ~ x", data) @@ -347,31 +245,21 @@ class TestLatentOccupancy(SeededTest): """ From the PyMC example list latent_occupancy.py - Simple model demonstrating the estimation of occupancy, using latent variables. Suppose a population of n sites, with some proportion pi being occupied. Each site is surveyed, yielding an array of counts, y: - y = [3, 0, 0, 2, 1, 0, 1, 0, ..., ] - This is a classic zero-inflated count problem, where more zeros appear in the data than would be predicted by a simple Poisson model. We have, in fact, a mixture of models; one, conditional on occupancy, with a poisson mean of theta, and another, conditional on absence, with mean zero. One way to tackle the problem is to model the latent state of 'occupancy' as a Bernoulli variable at each site, with some unknown probability: - z_i ~ Bern(pi) - These latent variables can then be used to generate an array of Poisson parameters: - t_i = theta (if z_i=1) or 0 (if z_i=0) - Hence, the likelihood is just: - y_i = Poisson(t_i) - (Note in this elementary model, we are ignoring the issue of imperfect detection.) - Created by Chris Fonnesbeck on 2008-07-28. Copyright (c) 2008 University of Otago. All rights reserved. """ @@ -385,7 +273,9 @@ def setup_method(self): # True occupancy pi = 0.4 # Simulate some data data - self.y = ((np.random.random(n) < pi) * np.random.poisson(lam=theta, size=n)).astype("int16") + self.y = ( + (np.random.random(n) < pi) * np.random.poisson(lam=theta, size=n) + ).astype("int16") def build_model(self): with pm.Model() as model: @@ -421,7 +311,6 @@ class TestRSV(SeededTest): This model estimates the population prevalence of respiratory syncytial virus (RSV) among children in Amman, Jordan, based on 3 years of admissions diagnosed with RSV to Al Bashir hospital. - To estimate this parameter from raw counts of diagnoses, we need to establish the population of 1-year-old children from which the diagnosed individuals were sampled. This involved correcting census data (national estimate of