pymc-labs · lucianopaz · Dec 21, 2022 · Dec 21, 2022 · Dec 14, 2022 · Jan 3, 2023
diff --git a/pymc_marketing/mmm/delayed_saturated_mmm.py b/pymc_marketing/mmm/delayed_saturated_mmm.py
@@ -7,10 +7,7 @@
 
 from pymc_marketing.mmm.base import MMM
 from pymc_marketing.mmm.preprocessing import MaxAbsScaleChannels, MixMaxScaleTarget
-from pymc_marketing.mmm.transformers import (
-    geometric_adstock_vectorized,
-    logistic_saturation,
-)
+from pymc_marketing.mmm.transformers import geometric_adstock, logistic_saturation
 from pymc_marketing.mmm.validating import ValidateControlColumns
 
 
@@ -82,11 +79,12 @@ def build_model(
 
             channel_adstock = pm.Deterministic(
                 name="channel_adstock",
-                var=geometric_adstock_vectorized(
+                var=geometric_adstock(
                     x=channel_data_,
                     alpha=alpha,
                     l_max=adstock_max_lag,
                     normalize=True,
+                    axis=0,
                 ),
                 dims=("date", "channel"),
             )

diff --git a/pymc_marketing/mmm/transformers.py b/pymc_marketing/mmm/transformers.py
@@ -1,7 +1,67 @@
 import pytensor.tensor as pt
 
+from pymc_marketing.mmm.utils import params_broadcast_shapes
 
-def geometric_adstock(x, alpha: float = 0.0, l_max: int = 12, normalize: bool = False):
+
+def batched_convolution(x, w, axis: int = 0):
+    """Apply a 1D convolution in a vectorized way across multiple batch dimensions.
+
+    Parameters
+    ----------
+    x :
+        The array to convolve.
+    w :
+        The weight of the convolution. The last axis of ``w`` determines the number of steps
+        to use in the convolution.
+    axis : int
+        The axis of ``x`` along witch to apply the convolution
+
+    Returns
+    -------
+    y :
+        The result of convolving ``x`` with ``w`` along the desired axis. The shape of the
+        result will match the shape of ``x`` up to broadcasting with ``w``. The convolved
+        axis will show the results of left padding zeros to ``x`` while applying the
+        convolutions.
+    """
+    orig_ndim = x.ndim
+    axis = axis if axis >= 0 else orig_ndim + axis
+    w = pt.as_tensor(w)
+    x = pt.moveaxis(x, axis, -1)
+    try:
+        l_max = w.shape[-1].eval()
+    except Exception:
+        l_max = None
+    (x_shape, w_shape), _ = params_broadcast_shapes(
+        param_shapes=[x.shape, w.shape],
+        ndims_params=[1, 1],
+        broadcastable_patterns=[
+            getattr(x, "broadcastable", tuple([s == 1 for s in x.shape])),
+            getattr(w, "broadcastable", tuple([s == 1 for s in w.shape])),
+        ],
+    )
+    x = pt.broadcast_to(x, x_shape)
+    w = pt.broadcast_to(w, w_shape)
+    x_time = x.shape[-1]
+    shape = (*x.shape, w.shape[-1])
+    padded_x = pt.zeros(shape, dtype=x.dtype)
+    if l_max is not None:
+        for i in range(l_max):
+            padded_x = pt.set_subtensor(
+                padded_x[..., i:x_time, i], x[..., : x_time - i]
+            )
+    else:  # pragma: no cover
+        raise NotImplementedError(
+            "At the moment, convolving with weight arrays that don't have a concrete shape "
+            "at compile time."
+        )
+    conv = pt.sum(padded_x * w[..., None, :], axis=-1)
+    return pt.moveaxis(conv, -1, axis + conv.ndim - orig_ndim)
+
+
+def geometric_adstock(
+    x, alpha: float = 0.0, l_max: int = 12, normalize: bool = False, axis: int = 0
+):
     """Geometric adstock transformation.
 
     Adstock with geometric decay assumes advertising effect peaks at the same
@@ -31,29 +91,19 @@ def geometric_adstock(x, alpha: float = 0.0, l_max: int = 12, normalize: bool =
     .. [1] Jin, Yuxue, et al. "Bayesian methods for media mix modeling
        with carryover and shape effects." (2017).
     """
-    cycles = [pt.concatenate([pt.zeros(i), x[: x.shape[0] - i]]) for i in range(l_max)]
-    x_cycle = pt.stack(cycles)
-    w = pt.as_tensor_variable([pt.power(alpha, i) for i in range(l_max)])
-    w = w / pt.sum(w) if normalize else w
-    return pt.dot(w, x_cycle)
-
-
-def geometric_adstock_vectorized(x, alpha, l_max: int = 12, normalize: bool = False):
-    """Vectorized geometric adstock transformation."""
-    cycles = [
-        pt.concatenate(tensor_list=[pt.zeros(shape=x.shape)[:i], x[: x.shape[0] - i]])
-        for i in range(l_max)
-    ]
-    x_cycle = pt.stack(cycles)
-    x_cycle = pt.transpose(x=x_cycle, axes=[1, 2, 0])
-    w = pt.as_tensor_variable([pt.power(alpha, i) for i in range(l_max)])
-    w = pt.transpose(w)[None, ...]
-    w = w / pt.sum(w, axis=2, keepdims=True) if normalize else w
-    return pt.sum(pt.mul(x_cycle, w), axis=2)
+
+    w = pt.power(pt.as_tensor(alpha)[..., None], pt.arange(l_max, dtype=x.dtype))
+    w = w / pt.sum(w, axis=-1, keepdims=True) if normalize else w
+    return batched_convolution(x, w, axis=axis)
 
 
 def delayed_adstock(
-    x, alpha: float = 0.0, theta: int = 0, l_max: int = 12, normalize: bool = False
+    x,
+    alpha: float = 0.0,
+    theta: int = 0,
+    l_max: int = 12,
+    normalize: bool = False,
+    axis: int = 0,
 ):
     """Delayed adstock transformation.
 
@@ -83,31 +133,12 @@ def delayed_adstock(
     .. [1] Jin, Yuxue, et al. "Bayesian methods for media mix modeling
        with carryover and shape effects." (2017).
     """
-    cycles = [pt.concatenate([pt.zeros(i), x[: x.shape[0] - i]]) for i in range(l_max)]
-    x_cycle = pt.stack(cycles)
-    w = pt.as_tensor_variable(
-        [pt.power(alpha, ((i - theta) ** 2)) for i in range(l_max)]
-    )
-    w = w / pt.sum(w) if normalize else w
-    return pt.dot(w, x_cycle)
-
-
-def delayed_adstock_vectorized(
-    x, alpha, theta, l_max: int = 12, normalize: bool = False
-):
-    """Delayed adstock transformation."""
-    cycles = [
-        pt.concatenate(tensor_list=[pt.zeros(shape=x.shape)[:i], x[: x.shape[0] - i]])
-        for i in range(l_max)
-    ]
-    x_cycle = pt.stack(cycles)
-    x_cycle = pt.transpose(x=x_cycle, axes=[1, 2, 0])
-    w = pt.as_tensor_variable(
-        [pt.power(alpha, ((i - theta) ** 2)) for i in range(l_max)]
+    w = pt.power(
+        pt.as_tensor(alpha)[..., None],
+        (pt.arange(l_max, dtype=x.dtype) - pt.as_tensor(theta)[..., None]) ** 2,
     )
-    w = pt.transpose(w)[None, ...]
-    w = w / pt.sum(w, axis=2, keepdims=True) if normalize else w
-    return pt.sum(pt.mul(x_cycle, w), axis=2)
+    w = w / pt.sum(w, axis=-1, keepdims=True) if normalize else w
+    return batched_convolution(x, w, axis=axis)
 
 
 def logistic_saturation(x, lam: float = 0.5):

diff --git a/pymc_marketing/mmm/utils.py b/pymc_marketing/mmm/utils.py
@@ -1,6 +1,13 @@
+from itertools import zip_longest
+
 import numpy as np
 import numpy.typing as npt
 import pandas as pd
+from pytensor import tensor as pt
+from pytensor.raise_op import Assert
+from pytensor.tensor.basic import get_scalar_constant_value
+from pytensor.tensor.exceptions import NotScalarConstantError
+from pytensor.tensor.var import TensorConstant
 
 
 def generate_fourier_modes(
@@ -33,3 +40,111 @@ def generate_fourier_modes(
             for func in ("sin", "cos")
         }
     )
+
+
+def params_broadcast_shapes(param_shapes, ndims_params, broadcastable_patterns):
+    """Broadcast shape tuples except for some number of support dimensions.
+
+    Parameters
+    ==========
+    param_shapes : list of ndarray or Variable
+        The shapes to broadcast.
+    ndims_params : list of int
+        The number of dimensions for each shape that are to be considered support dimensions that
+        need not broadcast together.
+    broadcastable_patterns : list of bool
+        The broadcastable pattern of each input shape.
+
+    Returns
+    =======
+    bcast_shapes : list of ndarray
+        The broadcasted values of `params`.
+    """
+
+    rev_extra_dims = tuple()
+    rev_extra_broadcastable = tuple()
+    for param_ind, (ndim_param, param_shape, broadcastable) in enumerate(
+        zip(ndims_params, param_shapes, broadcastable_patterns)
+    ):
+        # Try to get concrete shapes from the start
+        if isinstance(param_shape, TensorConstant):
+            param_shape = param_shape.value
+        # We need this in order to use `len`
+        param_shape = tuple(param_shape)
+        extras = tuple(param_shape[: (len(param_shape) - ndim_param)])
+        extra_broadcastable = tuple(broadcastable[: (len(param_shape) - ndim_param)])
+
+        def bcast(x, y, bcast_x, bcast_y, i):
+            try:
+                concrete_x = get_scalar_constant_value(x)
+            except NotScalarConstantError:
+                concrete_x = None
+            try:
+                concrete_y = get_scalar_constant_value(y)
+            except NotScalarConstantError:
+                concrete_y = None
+            if not bcast_x and not bcast_y:
+                if concrete_x is not None and concrete_y is not None:
+                    if (
+                        (concrete_x == 1 and not bcast_x)
+                        or (concrete_y == 1 and not bcast_y)
+                    ) and concrete_x != concrete_y:
+                        raise ValueError(
+                            f"Shape along axis {i} in the {param_ind} supplied param_shape was "
+                            "tagged as not broadcastable and it was not exactly equal to the other "
+                            "supplied param_shapes."
+                        )
+                    elif concrete_x != concrete_y:
+                        raise ValueError(
+                            f"Cannot broadcast shapes {concrete_x} and {concrete_y} together"
+                        )
+                    return (x, False)
+                else:
+                    assert_op = Assert(
+                        f"Shape along axis {i} in the {param_ind} supplied param_shape was tagged as "
+                        f"not broadcastable and it was not exactly equal to the other supplied "
+                        "param_shapes."
+                    )
+                    return (assert_op(x, pt.eq(x, y)), False)
+            elif bcast_x:
+                return (y, bcast_y)
+            elif bcast_y:
+                return (x, bcast_x)
+            else:
+                return (x, bcast_x)
+
+        temp = [
+            bcast(a, b, broadcastable_a, broadcastable_b, i)
+            for i, (b, a, (broadcastable_b, broadcastable_a)) in enumerate(
+                zip_longest(
+                    reversed(extras),
+                    rev_extra_dims,
+                    zip_longest(
+                        reversed(extra_broadcastable),
+                        rev_extra_broadcastable,
+                        fillvalue=True,
+                    ),
+                    fillvalue=1,
+                )
+            )
+        ]
+        if len(temp) > 0:
+            rev_extra_dims, rev_extra_broadcastable = list(zip(*temp))
+
+    extra_dims = tuple(reversed(rev_extra_dims))
+    extra_broadcastable = tuple(reversed(rev_extra_broadcastable))
+
+    bcast_shapes = [
+        (extra_dims + tuple(getattr(param_shape, "value", param_shape))[-ndim_param:])
+        if ndim_param > 0
+        else extra_dims
+        for ndim_param, param_shape in zip(ndims_params, param_shapes)
+    ]
+    new_broadcastable_patterns = [
+        (extra_broadcastable + broadcastable[-ndim_param:])
+        if ndim_param > 0
+        else extra_broadcastable
+        for ndim_param, broadcastable in zip(ndims_params, broadcastable_patterns)
+    ]
+
+    return list(bcast_shapes), list(new_broadcastable_patterns)
diff --git a/pyproject.toml b/pyproject.toml
@@ -6,7 +6,7 @@ addopts = [
     "-v",
     "--strict-markers",
     "--strict-config",
-    "--cov=mmm",
-    "--cov=pymmmc",
+    "--cov=pymc_marketing",
+    "--cov-report=term-missing",
 ]
 testpaths = "tests"