From 48dab2c703db1f971cf3eb2cfb1bca732741ddf6 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Fri, 11 Mar 2022 18:25:22 +0100
Subject: [PATCH 1/8] Allow Constant to be integer or float

---
 pymc/distributions/discrete.py           |  16 +++-
 pymc/tests/test_distributions.py         |  20 ++++-
 pymc/tests/test_distributions_moments.py |  10 ++-
 pymc/tests/test_distributions_random.py  | 110 ++---------------------
 4 files changed, 44 insertions(+), 112 deletions(-)

diff --git a/pymc/distributions/discrete.py b/pymc/distributions/discrete.py
index c1dd5e700a..5cd64a0970 100644
--- a/pymc/distributions/discrete.py
+++ b/pymc/distributions/discrete.py
@@ -46,6 +46,7 @@
 from pymc.distributions.logprob import logcdf, logp
 from pymc.distributions.shape_utils import rv_size_is_none
 from pymc.math import sigmoid
+from pymc.vartypes import continuous_types
 
 __all__ = [
     "Binomial",
@@ -1315,9 +1316,12 @@ class ConstantRV(RandomVariable):
     name = "constant"
     ndim_supp = 0
     ndims_params = [0]
-    dtype = "floatX"  # Should be treated as a discrete variable!
     _print_name = ("Constant", "\\operatorname{Constant}")
 
+    def make_node(self, rng, size, dtype, c):
+        c = at.as_tensor_variable(c)
+        return super().make_node(rng, size, c.dtype, c)
+
     @classmethod
     def rng_fn(cls, rng, c, size=None):
         if size is None:
@@ -1334,15 +1338,19 @@ class Constant(Discrete):
 
     Parameters
     ----------
-    value: float or int
-        Constant parameter.
+    c: float or int
+        Constant parameter. The dtype of `c` determines the dtype of the distribution.
+        This can affect which sampler is assigned to Constant variables, or variables
+        that use Constant, such as Mixtures.
     """
 
     rv_op = constant
 
     @classmethod
     def dist(cls, c, *args, **kwargs):
-        c = at.as_tensor_variable(floatX(c))
+        c = at.as_tensor_variable(c)
+        if c.dtype in continuous_types:
+            c = floatX(c)
         return super().dist([c], **kwargs)
 
     def get_moment(rv, size, c):
diff --git a/pymc/tests/test_distributions.py b/pymc/tests/test_distributions.py
index 8f7488aa0a..b62535a9cd 100644
--- a/pymc/tests/test_distributions.py
+++ b/pymc/tests/test_distributions.py
@@ -130,7 +130,7 @@ def polyagamma_cdf(*args, **kwargs):
 from pymc.math import kronecker
 from pymc.model import Deterministic, Model, Point, Potential
 from pymc.tests.helpers import select_by_precision
-from pymc.vartypes import continuous_types
+from pymc.vartypes import continuous_types, discrete_types
 
 
 def get_lkj_cases():
@@ -934,7 +934,9 @@ def check_selfconsistency_discrete_logcdf(
         Check that logcdf of discrete distributions matches sum of logps up to value
         """
         # This test only works for scalar random variables
-        assert distribution.rv_op.ndim_supp == 0
+        rv_op = getattr(distribution, "rv_op", None)
+        if rv_op:
+            assert rv_op.ndim_supp == 0
 
         domains = paramdomains.copy()
         domains["value"] = domain
@@ -3416,3 +3418,17 @@ def test_sd_dist_automatically_resized(self, sd_dist):
         assert resized_sd_dist.eval().shape == (10, 3)
         # LKJCov has support shape `(n * (n+1)) // 2`
         assert x.eval().shape == (10, 6)
+
+
+@pytest.mark.parametrize(
+    "dist, non_psi_args",
+    [
+        (pm.ZeroInflatedPoisson.dist, (2,)),
+        (pm.ZeroInflatedBinomial.dist, (2, 0.5)),
+        (pm.ZeroInflatedNegativeBinomial.dist, (2, 2)),
+    ],
+)
+def test_zero_inflated_dists_dtype_and_broadcast(dist, non_psi_args):
+    x = dist([0.5, 0.5, 0.5], *non_psi_args)
+    assert x.dtype in discrete_types
+    assert x.eval().shape == (3,)
diff --git a/pymc/tests/test_distributions_moments.py b/pymc/tests/test_distributions_moments.py
index 6b3aa60fe4..db1af97b00 100644
--- a/pymc/tests/test_distributions_moments.py
+++ b/pymc/tests/test_distributions_moments.py
@@ -626,7 +626,7 @@ def test_constant_moment(c, size, expected):
 @pytest.mark.parametrize(
     "psi, theta, size, expected",
     [
-        (0.9, 3.0, None, 2),
+        (0.9, 3.0, None, 3),
         (0.8, 2.9, 5, np.full(5, 2)),
         (0.2, np.arange(1, 5) * 5, None, np.arange(1, 5)),
         (0.2, np.arange(1, 5) * 5, (2, 4), np.full((2, 4), np.arange(1, 5))),
@@ -1335,7 +1335,13 @@ def test_multinomial_moment(p, n, size, expected):
     [
         (0.2, 10, 3, None, 2),
         (0.2, 10, 4, 5, np.full(5, 2)),
-        (0.4, np.arange(1, 5), np.arange(2, 6), None, np.array([0, 0, 1, 1])),
+        (
+            0.4,
+            np.arange(1, 5),
+            np.arange(2, 6),
+            None,
+            np.array([0, 1, 1, 2] if aesara.config.floatX == "float64" else [0, 0, 1, 1]),
+        ),
         (
             np.linspace(0.2, 0.6, 3),
             np.arange(1, 10, 4),
diff --git a/pymc/tests/test_distributions_random.py b/pymc/tests/test_distributions_random.py
index f01be90541..ce140b6b5b 100644
--- a/pymc/tests/test_distributions_random.py
+++ b/pymc/tests/test_distributions_random.py
@@ -1571,112 +1571,14 @@ def constant_rng_fn(self, size, c):
         "check_pymc_params_match_rv_op",
         "check_pymc_draws_match_reference",
         "check_rv_size",
+        "check_dtype",
     ]
 
-
-class TestZeroInflatedPoisson(BaseTestDistributionRandom):
-    def zero_inflated_poisson_rng_fn(self, size, psi, theta, poisson_rng_fct, random_rng_fct):
-        return poisson_rng_fct(theta, size=size) * (random_rng_fct(size=size) < psi)
-
-    def seeded_zero_inflated_poisson_rng_fn(self):
-        poisson_rng_fct = functools.partial(
-            getattr(np.random.RandomState, "poisson"), self.get_random_state()
-        )
-
-        random_rng_fct = functools.partial(
-            getattr(np.random.RandomState, "random"), self.get_random_state()
-        )
-
-        return functools.partial(
-            self.zero_inflated_poisson_rng_fn,
-            poisson_rng_fct=poisson_rng_fct,
-            random_rng_fct=random_rng_fct,
-        )
-
-    pymc_dist = pm.ZeroInflatedPoisson
-    pymc_dist_params = {"psi": 0.9, "theta": 4.0}
-    expected_rv_op_params = {"psi": 0.9, "theta": 4.0}
-    reference_dist_params = {"psi": 0.9, "theta": 4.0}
-    reference_dist = seeded_zero_inflated_poisson_rng_fn
-    checks_to_run = [
-        "check_pymc_params_match_rv_op",
-        "check_pymc_draws_match_reference",
-        "check_rv_size",
-    ]
-
-
-class TestZeroInflatedBinomial(BaseTestDistributionRandom):
-    def zero_inflated_binomial_rng_fn(self, size, psi, n, p, binomial_rng_fct, random_rng_fct):
-        return binomial_rng_fct(n, p, size=size) * (random_rng_fct(size=size) < psi)
-
-    def seeded_zero_inflated_binomial_rng_fn(self):
-        binomial_rng_fct = functools.partial(
-            getattr(np.random.RandomState, "binomial"), self.get_random_state()
-        )
-
-        random_rng_fct = functools.partial(
-            getattr(np.random.RandomState, "random"), self.get_random_state()
-        )
-
-        return functools.partial(
-            self.zero_inflated_binomial_rng_fn,
-            binomial_rng_fct=binomial_rng_fct,
-            random_rng_fct=random_rng_fct,
-        )
-
-    pymc_dist = pm.ZeroInflatedBinomial
-    pymc_dist_params = {"psi": 0.9, "n": 12, "p": 0.7}
-    expected_rv_op_params = {"psi": 0.9, "n": 12, "p": 0.7}
-    reference_dist_params = {"psi": 0.9, "n": 12, "p": 0.7}
-    reference_dist = seeded_zero_inflated_binomial_rng_fn
-    checks_to_run = [
-        "check_pymc_params_match_rv_op",
-        "check_pymc_draws_match_reference",
-        "check_rv_size",
-    ]
-
-
-class TestZeroInflatedNegativeBinomialMuSigma(BaseTestDistributionRandom):
-    def zero_inflated_negbinomial_rng_fn(
-        self, size, psi, n, p, negbinomial_rng_fct, random_rng_fct
-    ):
-        return negbinomial_rng_fct(n, p, size=size) * (random_rng_fct(size=size) < psi)
-
-    def seeded_zero_inflated_negbinomial_rng_fn(self):
-        negbinomial_rng_fct = functools.partial(
-            getattr(np.random.RandomState, "negative_binomial"), self.get_random_state()
-        )
-
-        random_rng_fct = functools.partial(
-            getattr(np.random.RandomState, "random"), self.get_random_state()
-        )
-
-        return functools.partial(
-            self.zero_inflated_negbinomial_rng_fn,
-            negbinomial_rng_fct=negbinomial_rng_fct,
-            random_rng_fct=random_rng_fct,
-        )
-
-    n, p = pm.NegativeBinomial.get_n_p(mu=3, alpha=5)
-
-    pymc_dist = pm.ZeroInflatedNegativeBinomial
-    pymc_dist_params = {"psi": 0.9, "mu": 3, "alpha": 5}
-    expected_rv_op_params = {"psi": 0.9, "n": n, "p": p}
-    reference_dist_params = {"psi": 0.9, "n": n, "p": p}
-    reference_dist = seeded_zero_inflated_negbinomial_rng_fn
-    checks_to_run = [
-        "check_pymc_params_match_rv_op",
-        "check_pymc_draws_match_reference",
-        "check_rv_size",
-    ]
-
-
-class TestZeroInflatedNegativeBinomial(BaseTestDistributionRandom):
-    pymc_dist = pm.ZeroInflatedNegativeBinomial
-    pymc_dist_params = {"psi": 0.9, "n": 12, "p": 0.7}
-    expected_rv_op_params = {"psi": 0.9, "n": 12, "p": 0.7}
-    reference_dist_params = {"psi": 0.9, "n": 12, "p": 0.7}
-    checks_to_run = ["check_pymc_params_match_rv_op"]
+    def check_dtype(self):
+        assert pm.Constant.dist(2**4).dtype == "int8"
+        assert pm.Constant.dist(2**16).dtype == "int32"
+        assert pm.Constant.dist(2**32).dtype == "int64"
+        assert pm.Constant.dist(2.0).dtype == aesara.config.floatX
 
 
 class TestOrderedLogistic(BaseTestDistributionRandom):

From 9b6c01957bf41127fa626a9b492592e5b9901d1d Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Fri, 11 Mar 2022 16:54:46 +0100
Subject: [PATCH 2/8] Implement Constant logcdf

---
 pymc/distributions/discrete.py   | 7 +++++++
 pymc/tests/test_distributions.py | 1 +
 2 files changed, 8 insertions(+)

diff --git a/pymc/distributions/discrete.py b/pymc/distributions/discrete.py
index 5cd64a0970..fb5173efb7 100644
--- a/pymc/distributions/discrete.py
+++ b/pymc/distributions/discrete.py
@@ -1378,6 +1378,13 @@ def logp(value, c):
             -np.inf,
         )
 
+    def logcdf(value, c):
+        return at.switch(
+            at.lt(value, c),
+            -np.inf,
+            0,
+        )
+
 
 class ZeroInflatedPoissonRV(RandomVariable):
     name = "zero_inflated_poisson"
diff --git a/pymc/tests/test_distributions.py b/pymc/tests/test_distributions.py
index b62535a9cd..2e8e599f1e 100644
--- a/pymc/tests/test_distributions.py
+++ b/pymc/tests/test_distributions.py
@@ -1744,6 +1744,7 @@ def test_poisson(self):
 
     def test_constantdist(self):
         self.check_logp(Constant, I, {"c": I}, lambda value, c: np.log(c == value))
+        self.check_logcdf(Constant, I, {"c": I}, lambda value, c: np.log(value >= c))
 
     def test_zeroinflatedpoisson(self):
         def logp_fn(value, psi, theta):

From 8a2d2bf3d3a38cfd7a369a84876639bc765c79b6 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Fri, 11 Mar 2022 18:45:18 +0100
Subject: [PATCH 3/8] Test Mixture dtype

---
 pymc/tests/test_mixture.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/pymc/tests/test_mixture.py b/pymc/tests/test_mixture.py
index 51754aa788..5d5c77ddca 100644
--- a/pymc/tests/test_mixture.py
+++ b/pymc/tests/test_mixture.py
@@ -658,6 +658,25 @@ def test_iterable_single_component_warning(self):
         with pytest.warns(UserWarning, match="Single component will be treated as a mixture"):
             Mixture.dist(w=[0.5, 0.5], comp_dists=[Normal.dist(size=2)])
 
+    def test_mixture_dtype(self):
+        mix_dtype = Mixture.dist(
+            w=[0.5, 0.5],
+            comp_dists=[
+                Multinomial.dist(n=5, p=[0.5, 0.5]),
+                Multinomial.dist(n=5, p=[0.5, 0.5]),
+            ],
+        ).dtype
+        assert mix_dtype == "int64"
+
+        mix_dtype = Mixture.dist(
+            w=[0.5, 0.5],
+            comp_dists=[
+                Dirichlet.dist(a=[0.5, 0.5]),
+                Dirichlet.dist(a=[0.5, 0.5]),
+            ],
+        ).dtype
+        assert mix_dtype == aesara.config.floatX
+
 
 class TestNormalMixture(SeededTest):
     def test_normal_mixture_sampling(self):

From b36ccee3ace82326ce5fdc66b173bbe285ff5a8f Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Fri, 11 Mar 2022 16:43:54 +0100
Subject: [PATCH 4/8] Implement Mixture logcdf

---
 pymc/distributions/mixture.py | 43 +++++++++++++++++++++++++++++++----
 1 file changed, 39 insertions(+), 4 deletions(-)

diff --git a/pymc/distributions/mixture.py b/pymc/distributions/mixture.py
index 0e06fef5f8..ab41650407 100644
--- a/pymc/distributions/mixture.py
+++ b/pymc/distributions/mixture.py
@@ -18,7 +18,7 @@
 import numpy as np
 
 from aeppl.abstract import MeasurableVariable, _get_measurable_outputs
-from aeppl.logprob import _logprob
+from aeppl.logprob import _logcdf, _logprob
 from aesara.compile.builders import OpFromGraph
 from aesara.tensor import TensorVariable
 from aesara.tensor.random.op import RandomVariable
@@ -33,7 +33,7 @@
     _get_moment,
     get_moment,
 )
-from pymc.distributions.logprob import logp
+from pymc.distributions.logprob import logcdf, logp
 from pymc.distributions.shape_utils import to_tuple
 from pymc.util import check_dist_not_registered
 
@@ -275,7 +275,8 @@ def rv_op(cls, weights, *components, size=None, rngs=None):
 
         # Index components and squeeze mixture dimension
         mix_out_ = at.take_along_axis(stacked_components_, mix_indexes_padded_, axis=mix_axis)
-        # There is a Aeasara bug in squeeze with negative axis
+        # There is a Aesara bug in squeeze with negative axis
+        # https://github.com/aesara-devs/aesara/issues/830
         # this is equivalent to np.squeeze(mix_out_, axis=mix_axis)
         mix_out_ = at.squeeze(mix_out_, axis=mix_out_.ndim + mix_axis)
 
@@ -389,7 +390,8 @@ def marginal_mixture_logprob(op, values, rng, weights, *components, **kwargs):
     mix_logp = at.logsumexp(at.log(weights) + components_logp, axis=-1)
 
     # Squeeze stack dimension
-    # There is a Aeasara bug in squeeze with negative axis
+    # There is a Aesara bug in squeeze with negative axis
+    # https://github.com/aesara-devs/aesara/issues/830
     # mix_logp = at.squeeze(mix_logp, axis=-1)
     mix_logp = at.squeeze(mix_logp, axis=mix_logp.ndim - 1)
 
@@ -404,6 +406,39 @@ def marginal_mixture_logprob(op, values, rng, weights, *components, **kwargs):
     return mix_logp
 
 
+@_logcdf.register(MarginalMixtureRV)
+def marginal_mixture_logcdf(op, value, rng, weights, *components, **kwargs):
+
+    # single component
+    if len(components) == 1:
+        # Need to broadcast value across mixture axis
+        mix_axis = -components[0].owner.op.ndim_supp - 1
+        components_logcdf = logcdf(components[0], at.expand_dims(value, mix_axis))
+    else:
+        components_logcdf = at.stack(
+            [logcdf(component, value) for component in components],
+            axis=-1,
+        )
+
+    mix_logcdf = at.logsumexp(at.log(weights) + components_logcdf, axis=-1)
+
+    # Squeeze stack dimension
+    # There is a Aesara bug in squeeze with negative axis
+    # https://github.com/aesara-devs/aesara/issues/830
+    # mix_logp = at.squeeze(mix_logp, axis=-1)
+    mix_logcdf = at.squeeze(mix_logcdf, axis=mix_logcdf.ndim - 1)
+
+    mix_logcdf = check_parameters(
+        mix_logcdf,
+        0 <= weights,
+        weights <= 1,
+        at.isclose(at.sum(weights, axis=-1), 1),
+        msg="0 <= weights <= 1, sum(weights) == 1",
+    )
+
+    return mix_logcdf
+
+
 @_get_moment.register(MarginalMixtureRV)
 def get_moment_marginal_mixture(op, rv, rng, weights, *components):
     ndim_supp = components[0].owner.op.ndim_supp

From 49fb695eedd83f570460cdc57bd00674afbb8ca1 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Fri, 11 Mar 2022 17:52:50 +0100
Subject: [PATCH 5/8] Allow broadcasting of mixture components

---
 pymc/distributions/mixture.py | 20 +++++++++-----------
 pymc/tests/test_mixture.py    | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 11 deletions(-)

diff --git a/pymc/distributions/mixture.py b/pymc/distributions/mixture.py
index ab41650407..a02bbb3e0c 100644
--- a/pymc/distributions/mixture.py
+++ b/pymc/distributions/mixture.py
@@ -176,7 +176,6 @@ def dist(cls, w, comp_dists, **kwargs):
             )
 
         # Check that components are not associated with a registered variable in the model
-        components_ndim = set()
         components_ndim_supp = set()
         for dist in comp_dists:
             # TODO: Allow these to not be a RandomVariable as long as we can call `ndim_supp` on them
@@ -188,14 +187,8 @@ def dist(cls, w, comp_dists, **kwargs):
                     f"Component dist must be a distribution created via the `.dist()` API, got {type(dist)}"
                 )
             check_dist_not_registered(dist)
-            components_ndim.add(dist.ndim)
             components_ndim_supp.add(dist.owner.op.ndim_supp)
 
-        if len(components_ndim) > 1:
-            raise ValueError(
-                f"Mixture components must all have the same dimensionality, got {components_ndim}"
-            )
-
         if len(components_ndim_supp) > 1:
             raise ValueError(
                 f"Mixture components must all have the same support dimensionality, got {components_ndim_supp}"
@@ -214,13 +207,18 @@ def rv_op(cls, weights, *components, size=None, rngs=None):
             # Create new rng for the mix_indexes internal RV
             mix_indexes_rng = aesara.shared(np.random.default_rng())
 
+        single_component = len(components) == 1
+        ndim_supp = components[0].owner.op.ndim_supp
+
         if size is not None:
             components = cls._resize_components(size, *components)
+        elif not single_component:
+            # We might need to broadcast components when size is not specified
+            shape = tuple(at.broadcast_shape(*components))
+            size = shape[: len(shape) - ndim_supp]
+            components = cls._resize_components(size, *components)
 
-        single_component = len(components) == 1
-
-        # Extract support and replication ndims from components and weights
-        ndim_supp = components[0].owner.op.ndim_supp
+        # Extract replication ndims from components and weights
         ndim_batch = components[0].ndim - ndim_supp
         if single_component:
             # One dimension is taken by the mixture axis in the single component case
diff --git a/pymc/tests/test_mixture.py b/pymc/tests/test_mixture.py
index 5d5c77ddca..3c061d5ef4 100644
--- a/pymc/tests/test_mixture.py
+++ b/pymc/tests/test_mixture.py
@@ -20,6 +20,7 @@
 import scipy.stats as st
 
 from aesara import tensor as at
+from aesara.tensor.random.op import RandomVariable
 from numpy.testing import assert_allclose
 from scipy.special import logsumexp
 
@@ -677,6 +678,38 @@ def test_mixture_dtype(self):
         ).dtype
         assert mix_dtype == aesara.config.floatX
 
+    @pytest.mark.parametrize(
+        "comp_dists, expected_shape",
+        [
+            (
+                [
+                    Normal.dist([[0, 0, 0], [0, 0, 0]]),
+                    Normal.dist([0, 0, 0]),
+                    Normal.dist([0]),
+                ],
+                (2, 3),
+            ),
+            (
+                [
+                    Dirichlet.dist([[1, 1, 1], [1, 1, 1]]),
+                    Dirichlet.dist([1, 1, 1]),
+                ],
+                (2, 3),
+            ),
+        ],
+    )
+    def test_broadcast_components(self, comp_dists, expected_shape):
+        n_dists = len(comp_dists)
+        mix = Mixture.dist(w=np.ones(n_dists) / n_dists, comp_dists=comp_dists)
+        mix_eval = mix.eval()
+        assert tuple(mix_eval.shape) == expected_shape
+        assert np.unique(mix_eval).size == mix.eval().size
+        for comp_dist in mix.owner.inputs[2:]:
+            # We check that the input is a "pure" RandomVariable and not a broadcast
+            # operation. This confirms that all draws will be unique
+            assert isinstance(comp_dist.owner.op, RandomVariable)
+            assert tuple(comp_dist.shape.eval()) == expected_shape
+
 
 class TestNormalMixture(SeededTest):
     def test_normal_mixture_sampling(self):

From 67b3d37b7d8dc037450cc9331f5f825231cf876a Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Sun, 13 Mar 2022 15:49:21 +0100
Subject: [PATCH 6/8] Round moment of discrete mixtures

---
 pymc/distributions/mixture.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pymc/distributions/mixture.py b/pymc/distributions/mixture.py
index a02bbb3e0c..ad07fee965 100644
--- a/pymc/distributions/mixture.py
+++ b/pymc/distributions/mixture.py
@@ -36,6 +36,7 @@
 from pymc.distributions.logprob import logcdf, logp
 from pymc.distributions.shape_utils import to_tuple
 from pymc.util import check_dist_not_registered
+from pymc.vartypes import discrete_types
 
 __all__ = ["Mixture", "NormalMixture"]
 
@@ -452,7 +453,10 @@ def get_moment_marginal_mixture(op, rv, rng, weights, *components):
             axis=mix_axis,
         )
 
-    return at.sum(weights * moment_components, axis=mix_axis)
+    moment = at.sum(weights * moment_components, axis=mix_axis)
+    if components[0].dtype in discrete_types:
+        moment = at.round(moment)
+    return moment
 
 
 class NormalMixture:

From d55ca391068659856bad6ec6f49e984dab9b0239 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Mon, 14 Mar 2022 14:23:30 +0100
Subject: [PATCH 7/8] Replace ZeroInflated distributions with Mixtures

---
 pymc/distributions/discrete.py | 332 +++++----------------------------
 1 file changed, 48 insertions(+), 284 deletions(-)

diff --git a/pymc/distributions/discrete.py b/pymc/distributions/discrete.py
index fb5173efb7..4a7c14423a 100644
--- a/pymc/distributions/discrete.py
+++ b/pymc/distributions/discrete.py
@@ -43,7 +43,8 @@
     normal_lcdf,
 )
 from pymc.distributions.distribution import Discrete
-from pymc.distributions.logprob import logcdf, logp
+from pymc.distributions.logprob import logp
+from pymc.distributions.mixture import Mixture
 from pymc.distributions.shape_utils import rv_size_is_none
 from pymc.math import sigmoid
 from pymc.vartypes import continuous_types
@@ -1386,22 +1387,24 @@ def logcdf(value, c):
         )
 
 
-class ZeroInflatedPoissonRV(RandomVariable):
-    name = "zero_inflated_poisson"
-    ndim_supp = 0
-    ndims_params = [0, 0]
-    dtype = "int64"
-    _print_name = ("ZeroInflatedPois", "\\operatorname{ZeroInflatedPois}")
-
-    @classmethod
-    def rng_fn(cls, rng, psi, lam, size):
-        return rng.poisson(lam, size=size) * (rng.random(size=size) < psi)
-
+def _zero_inflated_mixture(*, name, nonzero_p, nonzero_dist, **kwargs):
+    """Helper function to create a zero-inflated mixture
 
-zero_inflated_poisson = ZeroInflatedPoissonRV()
-
-
-class ZeroInflatedPoisson(Discrete):
+    If name is `None`, this function returns an unregistered variable
+    """
+    nonzero_p = at.as_tensor_variable(floatX(nonzero_p))
+    weights = at.stack([1 - nonzero_p, nonzero_p], axis=-1)
+    comp_dists = [
+        Constant.dist(0),
+        nonzero_dist,
+    ]
+    if name is not None:
+        return Mixture(name, weights, comp_dists, **kwargs)
+    else:
+        return Mixture.dist(weights, comp_dists, **kwargs)
+
+
+class ZeroInflatedPoisson:
     R"""
     Zero-inflated Poisson log-likelihood.
 
@@ -1452,97 +1455,19 @@ class ZeroInflatedPoisson(Discrete):
         (theta >= 0).
     """
 
-    rv_op = zero_inflated_poisson
-
-    @classmethod
-    def dist(cls, psi, theta, *args, **kwargs):
-        psi = at.as_tensor_variable(floatX(psi))
-        theta = at.as_tensor_variable(floatX(theta))
-        return super().dist([psi, theta], *args, **kwargs)
-
-    def get_moment(rv, size, psi, theta):
-        mean = at.floor(psi * theta)
-        if not rv_size_is_none(size):
-            mean = at.full(size, mean)
-        return mean
-
-    def logp(value, psi, theta):
-        r"""
-        Calculate log-probability of ZeroInflatedPoisson distribution at specified value.
-
-        Parameters
-        ----------
-        value: numeric
-            Value(s) for which log-probability is calculated. If the log probabilities for multiple
-            values are desired the values must be provided in a numpy array or Aesara tensor
-
-        Returns
-        -------
-        TensorVariable
-        """
-
-        res = at.switch(
-            at.gt(value, 0),
-            at.log(psi) + logp(Poisson.dist(mu=theta), value),
-            at.logaddexp(at.log1p(-psi), at.log(psi) - theta),
-        )
-
-        res = at.switch(at.lt(value, 0), -np.inf, res)
-
-        return check_parameters(
-            res,
-            0 <= psi,
-            psi <= 1,
-            0 <= theta,
-            msg="0 <= psi <= 1, theta >= 0",
-        )
-
-    def logcdf(value, psi, theta):
-        """
-        Compute the log of the cumulative distribution function for ZeroInflatedPoisson distribution
-        at the specified value.
-
-        Parameters
-        ----------
-        value: numeric or np.ndarray or aesara.tensor
-            Value(s) for which log CDF is calculated. If the log CDF for multiple
-            values are desired the values must be provided in a numpy array or Aesara tensor.
-
-        Returns
-        -------
-        TensorVariable
-        """
-
-        res = at.switch(
-            at.lt(value, 0),
-            -np.inf,
-            at.logaddexp(
-                at.log1p(-psi),
-                at.log(psi) + logcdf(Poisson.dist(mu=theta), value),
-            ),
-        )
-
-        return check_parameters(
-            res, 0 <= psi, psi <= 1, 0 <= theta, msg="0 <= psi <= 1, theta >= 0"
+    def __new__(cls, name, psi, theta, **kwargs):
+        return _zero_inflated_mixture(
+            name=name, nonzero_p=psi, nonzero_dist=Poisson.dist(mu=theta), **kwargs
         )
 
-
-class ZeroInflatedBinomialRV(RandomVariable):
-    name = "zero_inflated_binomial"
-    ndim_supp = 0
-    ndims_params = [0, 0, 0]
-    dtype = "int64"
-    _print_name = ("ZeroInflatedBinom", "\\operatorname{ZeroInflatedBinom}")
-
     @classmethod
-    def rng_fn(cls, rng, psi, n, p, size):
-        return rng.binomial(n=n, p=p, size=size) * (rng.random(size=size) < psi)
-
-
-zero_inflated_binomial = ZeroInflatedBinomialRV()
+    def dist(cls, psi, theta, **kwargs):
+        return _zero_inflated_mixture(
+            name=None, nonzero_p=psi, nonzero_dist=Poisson.dist(mu=theta), **kwargs
+        )
 
 
-class ZeroInflatedBinomial(Discrete):
+class ZeroInflatedBinomial:
     R"""
     Zero-inflated Binomial log-likelihood.
 
@@ -1594,110 +1519,19 @@ class ZeroInflatedBinomial(Discrete):
 
     """
 
-    rv_op = zero_inflated_binomial
-
-    @classmethod
-    def dist(cls, psi, n, p, *args, **kwargs):
-        psi = at.as_tensor_variable(floatX(psi))
-        n = at.as_tensor_variable(intX(n))
-        p = at.as_tensor_variable(floatX(p))
-        return super().dist([psi, n, p], *args, **kwargs)
-
-    def get_moment(rv, size, psi, n, p):
-        mean = at.round(psi * n * p)
-        if not rv_size_is_none(size):
-            mean = at.full(size, mean)
-        return mean
-
-    def logp(value, psi, n, p):
-        r"""
-        Calculate log-probability of ZeroInflatedBinomial distribution at specified value.
-
-        Parameters
-        ----------
-        value: numeric
-            Value(s) for which log-probability is calculated. If the log probabilities for multiple
-            values are desired the values must be provided in a numpy array or Aesara tensor
-
-        Returns
-        -------
-        TensorVariable
-        """
-
-        res = at.switch(
-            at.gt(value, 0),
-            at.log(psi) + logp(Binomial.dist(n=n, p=p), value),
-            at.logaddexp(at.log1p(-psi), at.log(psi) + n * at.log1p(-p)),
-        )
-
-        res = at.switch(
-            at.lt(value, 0),
-            -np.inf,
-            res,
-        )
-
-        return check_parameters(
-            res,
-            0 <= psi,
-            psi <= 1,
-            0 <= p,
-            p <= 1,
-            msg="0 <= psi <= 1, 0 <= p <= 1",
-        )
-
-    def logcdf(value, psi, n, p):
-        """
-        Compute the log of the cumulative distribution function for ZeroInflatedBinomial distribution
-        at the specified value.
-
-        Parameters
-        ----------
-        value: numeric or np.ndarray or aesara.tensor
-            Value(s) for which log CDF is calculated. If the log CDF for multiple
-            values are desired the values must be provided in a numpy array or Aesara tensor.
-
-        Returns
-        -------
-        TensorVariable
-        """
-        res = at.switch(
-            at.or_(at.lt(value, 0), at.gt(value, n)),
-            -np.inf,
-            at.logaddexp(
-                at.log1p(-psi),
-                at.log(psi) + logcdf(Binomial.dist(n=n, p=p), value),
-            ),
-        )
-
-        return check_parameters(
-            res,
-            0 <= psi,
-            psi <= 1,
-            0 <= p,
-            p <= 1,
-            msg="0 <= psi <= 1, 0 <= p <= 1",
+    def __new__(cls, name, psi, n, p, **kwargs):
+        return _zero_inflated_mixture(
+            name=name, nonzero_p=psi, nonzero_dist=Binomial.dist(n=n, p=p), **kwargs
         )
 
-
-class ZeroInflatedNegBinomialRV(RandomVariable):
-    name = "zero_inflated_neg_binomial"
-    ndim_supp = 0
-    ndims_params = [0, 0, 0]
-    dtype = "int64"
-    _print_name = (
-        "ZeroInflatedNegBinom",
-        "\\operatorname{ZeroInflatedNegBinom}",
-    )
-
     @classmethod
-    def rng_fn(cls, rng, psi, n, p, size):
-        return rng.negative_binomial(n=n, p=p, size=size) * (rng.random(size=size) < psi)
-
-
-zero_inflated_neg_binomial = ZeroInflatedNegBinomialRV()
+    def dist(cls, psi, n, p, **kwargs):
+        return _zero_inflated_mixture(
+            name=None, nonzero_p=psi, nonzero_dist=Binomial.dist(n=n, p=p), **kwargs
+        )
 
 
-class ZeroInflatedNegativeBinomial(Discrete):
+class ZeroInflatedNegativeBinomial:
     R"""
     Zero-Inflated Negative binomial log-likelihood.
 
@@ -1778,91 +1612,21 @@ def ZeroInfNegBinom(a, m, psi, x):
         Alternative number of target success trials (n > 0)
     """
 
-    rv_op = zero_inflated_neg_binomial
-
-    @classmethod
-    def dist(cls, psi, mu=None, alpha=None, p=None, n=None, *args, **kwargs):
-        psi = at.as_tensor_variable(floatX(psi))
-        n, p = NegativeBinomial.get_n_p(mu=mu, alpha=alpha, p=p, n=n)
-        n = at.as_tensor_variable(floatX(n))
-        p = at.as_tensor_variable(floatX(p))
-        return super().dist([psi, n, p], *args, **kwargs)
-
-    def get_moment(rv, size, psi, n, p):
-        mean = at.floor(psi * n * (1 - p) / p)
-        if not rv_size_is_none(size):
-            mean = at.full(size, mean)
-        return mean
-
-    def logp(value, psi, n, p):
-        r"""
-        Calculate log-probability of ZeroInflatedNegativeBinomial distribution at specified value.
-
-        Parameters
-        ----------
-        value: numeric
-            Value(s) for which log-probability is calculated. If the log probabilities for multiple
-            values are desired the values must be provided in a numpy array or Aesara tensor
-
-        Returns
-        -------
-        TensorVariable
-        """
-
-        res = at.switch(
-            at.gt(value, 0),
-            at.log(psi) + logp(NegativeBinomial.dist(n=n, p=p), value),
-            at.logaddexp(at.log1p(-psi), at.log(psi) + n * at.log(p)),
-        )
-
-        res = at.switch(
-            at.lt(value, 0),
-            -np.inf,
-            res,
-        )
-
-        return check_parameters(
-            res,
-            0 <= psi,
-            psi <= 1,
-            0 < n,
-            0 <= p,
-            p <= 1,
-            msg="0 <= psi <= 1, n > 0, 0 <= p <= 1",
-        )
-
-    def logcdf(value, psi, n, p):
-        """
-        Compute the log of the cumulative distribution function for ZeroInflatedNegativeBinomial distribution
-        at the specified value.
-
-        Parameters
-        ----------
-        value: numeric or np.ndarray or aesara.tensor
-            Value(s) for which log CDF is calculated. If the log CDF for multiple
-            values are desired the values must be provided in a numpy array or Aesara tensor.
-
-        Returns
-        -------
-        TensorVariable
-        """
-        res = at.switch(
-            at.lt(value, 0),
-            -np.inf,
-            at.logaddexp(
-                at.log1p(-psi),
-                at.log(psi) + logcdf(NegativeBinomial.dist(n=n, p=p), value),
-            ),
+    def __new__(cls, name, psi, mu=None, alpha=None, p=None, n=None, **kwargs):
+        return _zero_inflated_mixture(
+            name=name,
+            nonzero_p=psi,
+            nonzero_dist=NegativeBinomial.dist(mu=mu, alpha=alpha, p=p, n=n),
+            **kwargs,
         )
 
-        return check_parameters(
-            res,
-            0 <= psi,
-            psi <= 1,
-            0 < n,
-            0 < p,
-            p <= 1,
-            msg="0 <= psi <= 1, n > 0, 0 < p <= 1",
+    @classmethod
+    def dist(cls, psi, mu=None, alpha=None, p=None, n=None, **kwargs):
+        return _zero_inflated_mixture(
+            name=None,
+            nonzero_p=psi,
+            nonzero_dist=NegativeBinomial.dist(mu=mu, alpha=alpha, p=p, n=n),
+            **kwargs,
         )
 
 

From 68bb77afc486fe2f308f1e64b694e67b2285ca9e Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Mon, 14 Mar 2022 14:28:15 +0100
Subject: [PATCH 8/8] Rename ZeroInflatedPoisson theta parameter to mu

For consistency with the base Poisson distribution
---
 RELEASE-NOTES.md                         |  1 +
 pymc/distributions/discrete.py           | 28 ++++++++++++------------
 pymc/tests/test_distributions.py         | 16 +++++++-------
 pymc/tests/test_distributions_moments.py |  6 ++---
 pymc/tests/test_sampling.py              |  6 ++---
 5 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index c1e84d9b4e..2615f61e5b 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -74,6 +74,7 @@ All of the above apply to:
   - The function `replace_with_values` function has been added to `gp.utils`.
   - `MarginalSparse` has been renamed `MarginalApprox`.
   - Removed `MixtureSameFamily`. `Mixture` is now capable of handling batched multivariate components (see [#5438](https://github.com/pymc-devs/pymc/pull/5438)).
+  - `ZeroInflatedPoisson` `theta` parameter was renamed to `mu` (see [#5584](https://github.com/pymc-devs/pymc/pull/5584)).
 - ...
 
 ### Expected breaks
diff --git a/pymc/distributions/discrete.py b/pymc/distributions/discrete.py
index 4a7c14423a..cadae04b8d 100644
--- a/pymc/distributions/discrete.py
+++ b/pymc/distributions/discrete.py
@@ -1414,8 +1414,8 @@ class ZeroInflatedPoisson:
 
     .. math::
 
-        f(x \mid \psi, \theta) = \left\{ \begin{array}{l}
-            (1-\psi) + \psi e^{-\theta}, \text{if } x = 0 \\
+        f(x \mid \psi, \mu) = \left\{ \begin{array}{l}
+            (1-\psi) + \psi e^{-\mu}, \text{if } x = 0 \\
             \psi \frac{e^{-\theta}\theta^x}{x!}, \text{if } x=1,2,3,\ldots
             \end{array} \right.
 
@@ -1428,13 +1428,13 @@ class ZeroInflatedPoisson:
         plt.style.use('arviz-darkgrid')
         x = np.arange(0, 22)
         psis = [0.7, 0.4]
-        thetas = [8, 4]
-        for psi, theta in zip(psis, thetas):
-            pmf = st.poisson.pmf(x, theta)
+        mus = [8, 4]
+        for psi, mu in zip(psis, mus):
+            pmf = st.poisson.pmf(x, mu)
             pmf[0] = (1 - psi) + pmf[0]
             pmf[1:] =  psi * pmf[1:]
             pmf /= pmf.sum()
-            plt.plot(x, pmf, '-o', label='$\\psi$ = {}, $\\theta$ = {}'.format(psi, theta))
+            plt.plot(x, pmf, '-o', label='$\\psi$ = {}, $\\mu$ = {}'.format(psi, mu))
         plt.xlabel('x', fontsize=12)
         plt.ylabel('f(x)', fontsize=12)
         plt.legend(loc=1)
@@ -1442,28 +1442,28 @@ class ZeroInflatedPoisson:
 
     ========  ==========================
     Support   :math:`x \in \mathbb{N}_0`
-    Mean      :math:`\psi\theta`
-    Variance  :math:`\theta + \frac{1-\psi}{\psi}\theta^2`
+    Mean      :math:`\psi\mu`
+    Variance  :math:`\mu + \frac{1-\psi}{\psi}\mu^2`
     ========  ==========================
 
     Parameters
     ----------
     psi: float
         Expected proportion of Poisson variates (0 < psi < 1)
-    theta: float
+    mu: float
         Expected number of occurrences during the given interval
-        (theta >= 0).
+        (mu >= 0).
     """
 
-    def __new__(cls, name, psi, theta, **kwargs):
+    def __new__(cls, name, psi, mu, **kwargs):
         return _zero_inflated_mixture(
-            name=name, nonzero_p=psi, nonzero_dist=Poisson.dist(mu=theta), **kwargs
+            name=name, nonzero_p=psi, nonzero_dist=Poisson.dist(mu=mu), **kwargs
         )
 
     @classmethod
-    def dist(cls, psi, theta, **kwargs):
+    def dist(cls, psi, mu, **kwargs):
         return _zero_inflated_mixture(
-            name=None, nonzero_p=psi, nonzero_dist=Poisson.dist(mu=theta), **kwargs
+            name=None, nonzero_p=psi, nonzero_dist=Poisson.dist(mu=mu), **kwargs
         )
 
 
diff --git a/pymc/tests/test_distributions.py b/pymc/tests/test_distributions.py
index 2e8e599f1e..27610f456a 100644
--- a/pymc/tests/test_distributions.py
+++ b/pymc/tests/test_distributions.py
@@ -1747,33 +1747,33 @@ def test_constantdist(self):
         self.check_logcdf(Constant, I, {"c": I}, lambda value, c: np.log(value >= c))
 
     def test_zeroinflatedpoisson(self):
-        def logp_fn(value, psi, theta):
+        def logp_fn(value, psi, mu):
             if value == 0:
-                return np.log((1 - psi) * sp.poisson.pmf(0, theta))
+                return np.log((1 - psi) * sp.poisson.pmf(0, mu))
             else:
-                return np.log(psi * sp.poisson.pmf(value, theta))
+                return np.log(psi * sp.poisson.pmf(value, mu))
 
-        def logcdf_fn(value, psi, theta):
-            return np.log((1 - psi) + psi * sp.poisson.cdf(value, theta))
+        def logcdf_fn(value, psi, mu):
+            return np.log((1 - psi) + psi * sp.poisson.cdf(value, mu))
 
         self.check_logp(
             ZeroInflatedPoisson,
             Nat,
-            {"psi": Unit, "theta": Rplus},
+            {"psi": Unit, "mu": Rplus},
             logp_fn,
         )
 
         self.check_logcdf(
             ZeroInflatedPoisson,
             Nat,
-            {"psi": Unit, "theta": Rplus},
+            {"psi": Unit, "mu": Rplus},
             logcdf_fn,
         )
 
         self.check_selfconsistency_discrete_logcdf(
             ZeroInflatedPoisson,
             Nat,
-            {"theta": Rplus, "psi": Unit},
+            {"mu": Rplus, "psi": Unit},
         )
 
     def test_zeroinflatednegativebinomial(self):
diff --git a/pymc/tests/test_distributions_moments.py b/pymc/tests/test_distributions_moments.py
index db1af97b00..0727963a92 100644
--- a/pymc/tests/test_distributions_moments.py
+++ b/pymc/tests/test_distributions_moments.py
@@ -624,7 +624,7 @@ def test_constant_moment(c, size, expected):
 
 
 @pytest.mark.parametrize(
-    "psi, theta, size, expected",
+    "psi, mu, size, expected",
     [
         (0.9, 3.0, None, 3),
         (0.8, 2.9, 5, np.full(5, 2)),
@@ -632,9 +632,9 @@ def test_constant_moment(c, size, expected):
         (0.2, np.arange(1, 5) * 5, (2, 4), np.full((2, 4), np.arange(1, 5))),
     ],
 )
-def test_zero_inflated_poisson_moment(psi, theta, size, expected):
+def test_zero_inflated_poisson_moment(psi, mu, size, expected):
     with Model() as model:
-        ZeroInflatedPoisson("x", psi=psi, theta=theta, size=size)
+        ZeroInflatedPoisson("x", psi=psi, mu=mu, size=size)
     assert_moment_is_expected(model, expected)
 
 
diff --git a/pymc/tests/test_sampling.py b/pymc/tests/test_sampling.py
index c957df19a6..9b4bfe062a 100644
--- a/pymc/tests/test_sampling.py
+++ b/pymc/tests/test_sampling.py
@@ -1128,11 +1128,11 @@ def test_shape_edgecase(self):
 
     def test_zeroinflatedpoisson(self):
         with pm.Model():
-            theta = pm.Beta("theta", alpha=1, beta=1)
+            mu = pm.Beta("mu", alpha=1, beta=1)
             psi = pm.HalfNormal("psi", sd=1)
-            pm.ZeroInflatedPoisson("suppliers", psi=psi, theta=theta, size=20)
+            pm.ZeroInflatedPoisson("suppliers", psi=psi, mu=mu, size=20)
             gen_data = pm.sample_prior_predictive(samples=5000)
-            assert gen_data.prior["theta"].shape == (1, 5000)
+            assert gen_data.prior["mu"].shape == (1, 5000)
             assert gen_data.prior["psi"].shape == (1, 5000)
             assert gen_data.prior["suppliers"].shape == (1, 5000, 20)