1010from pymc .distributions .discrete import Bernoulli , Categorical , DiscreteUniform
1111from pymc .distributions .transforms import Chain
1212from pymc .logprob .abstract import _logprob
13- from pymc .logprob .basic import conditional_logp
13+ from pymc .logprob .basic import conditional_logp , logp
1414from pymc .logprob .transforms import IntervalTransform
1515from pymc .model import Model
1616from pymc .pytensorf import compile_pymc , constant_fold , inputvars
1717from pymc .util import _get_seeds_per_chain , dataset_to_point_list , treedict
18- from pytensor import Mode
18+ from pytensor import Mode , scan
1919from pytensor .compile import SharedVariable
2020from pytensor .compile .builders import OpFromGraph
21- from pytensor .graph import (
22- Constant ,
23- FunctionGraph ,
24- ancestors ,
25- clone_replace ,
26- vectorize_graph ,
27- )
21+ from pytensor .graph import Constant , FunctionGraph , ancestors , clone_replace
22+ from pytensor .graph .replace import vectorize_graph
2823from pytensor .scan import map as scan_map
2924from pytensor .tensor import TensorType , TensorVariable
3025from pytensor .tensor .elemwise import Elemwise
3328
3429__all__ = ["MarginalModel" ]
3530
31+ from pymc_experimental .distributions import DiscreteMarkovChain
32+
3633
3734class MarginalModel (Model ):
3835 """Subclass of PyMC Model that implements functionality for automatic
@@ -245,16 +242,25 @@ def marginalize(
245242 self [var ] if isinstance (var , str ) else var for var in rvs_to_marginalize
246243 ]
247244
248- supported_dists = (Bernoulli , Categorical , DiscreteUniform )
249245 for rv_to_marginalize in rvs_to_marginalize :
250246 if rv_to_marginalize not in self .free_RVs :
251247 raise ValueError (
252248 f"Marginalized RV { rv_to_marginalize } is not a free RV in the model"
253249 )
254- if not isinstance (rv_to_marginalize .owner .op , supported_dists ):
250+
251+ rv_op = rv_to_marginalize .owner .op
252+ if isinstance (rv_op , DiscreteMarkovChain ):
253+ if rv_op .n_lags > 1 :
254+ raise NotImplementedError (
255+ "Marginalization for DiscreteMarkovChain with n_lags > 1 is not supported"
256+ )
257+ if rv_to_marginalize .owner .inputs [0 ].type .ndim > 2 :
258+ raise NotImplementedError (
259+ "Marginalization for DiscreteMarkovChain with non-matrix transition probability is not supported"
260+ )
261+ elif not isinstance (rv_op , (Bernoulli , Categorical , DiscreteUniform )):
255262 raise NotImplementedError (
256- f"RV with distribution { rv_to_marginalize .owner .op } cannot be marginalized. "
257- f"Supported distribution include { supported_dists } "
263+ f"Marginalization of RV with distribution { rv_to_marginalize .owner .op } is not supported"
258264 )
259265
260266 if rv_to_marginalize .name in self .named_vars_to_dims :
@@ -490,6 +496,10 @@ class FiniteDiscreteMarginalRV(MarginalRV):
490496 """Base class for Finite Discrete Marginalized RVs"""
491497
492498
499+ class DiscreteMarginalMarkovChainRV (MarginalRV ):
500+ """Base class for Discrete Marginal Markov Chain RVs"""
501+
502+
493503def static_shape_ancestors (vars ):
494504 """Identify ancestors Shape Ops of static shapes (therefore constant in a valid graph)."""
495505 return [
@@ -618,11 +628,17 @@ def replace_finite_discrete_marginal_subgraph(fgraph, rv_to_marginalize, all_rvs
618628 replace_inputs .update ({input_rv : input_rv .type () for input_rv in input_rvs })
619629 cloned_outputs = clone_replace (outputs , replace = replace_inputs )
620630
621- marginalization_op = FiniteDiscreteMarginalRV (
631+ if isinstance (rv_to_marginalize .owner .op , DiscreteMarkovChain ):
632+ marginalize_constructor = DiscreteMarginalMarkovChainRV
633+ else :
634+ marginalize_constructor = FiniteDiscreteMarginalRV
635+
636+ marginalization_op = marginalize_constructor (
622637 inputs = list (replace_inputs .values ()),
623638 outputs = cloned_outputs ,
624639 ndim_supp = ndim_supp ,
625640 )
641+
626642 marginalized_rvs = marginalization_op (* replace_inputs .keys ())
627643 fgraph .replace_all (tuple (zip (rvs_to_marginalize , marginalized_rvs )))
628644 return rvs_to_marginalize , marginalized_rvs
@@ -638,6 +654,9 @@ def get_domain_of_finite_discrete_rv(rv: TensorVariable) -> Tuple[int, ...]:
638654 elif isinstance (op , DiscreteUniform ):
639655 lower , upper = constant_fold (rv .owner .inputs [3 :])
640656 return tuple (range (lower , upper + 1 ))
657+ elif isinstance (op , DiscreteMarkovChain ):
658+ p = rv .owner .inputs [0 ]
659+ return tuple (range (pt .get_vector_length (p [- 1 ])))
641660
642661 raise NotImplementedError (f"Cannot compute domain for op { op } " )
643662
@@ -728,3 +747,69 @@ def logp_fn(marginalized_rv_const, *non_sequences):
728747
729748 # We have to add dummy logps for the remaining value variables, otherwise PyMC will raise
730749 return joint_logps , * (pt .constant (0 ),) * (len (values ) - 1 )
750+
751+
752+ @_logprob .register (DiscreteMarginalMarkovChainRV )
753+ def marginal_hmm_logp (op , values , * inputs , ** kwargs ):
754+
755+ marginalized_rvs_node = op .make_node (* inputs )
756+ inner_rvs = clone_replace (
757+ op .inner_outputs ,
758+ replace = {u : v for u , v in zip (op .inner_inputs , marginalized_rvs_node .inputs )},
759+ )
760+
761+ chain_rv , * dependent_rvs = inner_rvs
762+ P , n_steps_ , init_dist_ , rng = chain_rv .owner .inputs
763+ domain = pt .arange (P .shape [- 1 ], dtype = "int32" )
764+
765+ # Construct logp in two steps
766+ # Step 1: Compute the probability of the data ("emissions") under every possible state (vec_logp_emission)
767+
768+ # First we need to vectorize the conditional logp graph of the data, in case there are batch dimensions floating
769+ # around. To do this, we need to break the dependency between chain and the init_dist_ random variable. Otherwise,
770+ # PyMC will detect a random variable in the logp graph (init_dist_), that isn't relevant at this step.
771+ chain_value = chain_rv .clone ()
772+ dependent_rvs = clone_replace (dependent_rvs , {chain_rv : chain_value })
773+ logp_emissions_dict = conditional_logp (dict (zip (dependent_rvs , values )))
774+
775+ # Reduce and add the batch dims beyond the chain dimension
776+ reduced_logp_emissions = _add_reduce_batch_dependent_logps (
777+ chain_rv .type , logp_emissions_dict .values ()
778+ )
779+
780+ # Add a batch dimension for the domain of the chain
781+ chain_shape = constant_fold (tuple (chain_rv .shape ))
782+ batch_chain_value = pt .moveaxis (pt .full ((* chain_shape , domain .size ), domain ), - 1 , 0 )
783+ batch_logp_emissions = vectorize_graph (reduced_logp_emissions , {chain_value : batch_chain_value })
784+
785+ # Step 2: Compute the transition probabilities
786+ # This is the "forward algorithm", alpha_t = p(y | s_t) * sum_{s_{t-1}}(p(s_t | s_{t-1}) * alpha_{t-1})
787+ # We do it entirely in logs, though.
788+
789+ # To compute the prior probabilities of each state, we evaluate the logp of the domain (all possible states) under
790+ # the initial distribution. This is robust to everything the user can throw at it.
791+ batch_logp_init_dist = pt .vectorize (lambda x : logp (init_dist_ , x ), "()->()" )(
792+ batch_chain_value [..., 0 ]
793+ )
794+ log_alpha_init = batch_logp_init_dist + batch_logp_emissions [..., 0 ]
795+
796+ def step_alpha (logp_emission , log_alpha , log_P ):
797+ step_log_prob = pt .logsumexp (log_alpha [:, None ] + log_P , axis = 0 )
798+ return logp_emission + step_log_prob
799+
800+ P_bcast_dims = (len (chain_shape ) - 1 ) - (P .type .ndim - 2 )
801+ log_P = pt .shape_padright (pt .log (P ), P_bcast_dims )
802+ log_alpha_seq , _ = scan (
803+ step_alpha ,
804+ non_sequences = [log_P ],
805+ outputs_info = [log_alpha_init ],
806+ # Scan needs the time dimension first, and we already consumed the 1st logp computing the initial value
807+ sequences = pt .moveaxis (batch_logp_emissions [..., 1 :], - 1 , 0 ),
808+ )
809+ # Final logp is just the sum of the last scan state
810+ joint_logp = pt .logsumexp (log_alpha_seq [- 1 ], axis = 0 )
811+
812+ # If there are multiple emission streams, we have to add dummy logps for the remaining value variables. The first
813+ # return is the joint probability of everything together, but PyMC still expects one logp for each one.
814+ dummy_logps = (pt .constant (0 ),) * (len (values ) - 1 )
815+ return joint_logp , * dummy_logps
0 commit comments