diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 6e1f0b46a4..0e45233ba7 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -22,6 +22,7 @@ It also brings some dreadfully awaited fixes, so be sure to go through the chang - Add `logcdf` method to all univariate discrete distributions (see [#4387](https://github.com/pymc-devs/pymc3/pull/4387)). - Add `random` method to `MvGaussianRandomWalk` (see [#4388](https://github.com/pymc-devs/pymc3/pull/4388)) - `AsymmetricLaplace` distribution added (see [#4392](https://github.com/pymc-devs/pymc3/pull/4392)). +- `DirichletMultinomial` distribution added (see [#4373](https://github.com/pymc-devs/pymc3/pull/4373)). ### Maintenance - Fixed bug whereby partial traces returns after keyboard interrupt during parallel sampling had fewer draws than would've been available [#4318](https://github.com/pymc-devs/pymc3/pull/4318) diff --git a/docs/source/api/distributions/multivariate.rst b/docs/source/api/distributions/multivariate.rst index 0e91f1b499..54122ae1db 100644 --- a/docs/source/api/distributions/multivariate.rst +++ b/docs/source/api/distributions/multivariate.rst @@ -14,6 +14,7 @@ Multivariate LKJCorr Multinomial Dirichlet + DirichletMultinomial .. automodule:: pymc3.distributions.multivariate :members: diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py index afbdf76b04..51958f541b 100644 --- a/pymc3/distributions/__init__.py +++ b/pymc3/distributions/__init__.py @@ -81,6 +81,7 @@ from pymc3.distributions.mixture import Mixture, MixtureSameFamily, NormalMixture from pymc3.distributions.multivariate import ( Dirichlet, + DirichletMultinomial, KroneckerNormal, LKJCholeskyCov, LKJCorr, @@ -155,6 +156,7 @@ "MvStudentT", "Dirichlet", "Multinomial", + "DirichletMultinomial", "Wishart", "WishartBartlett", "LKJCholeskyCov", diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index 5a4d1cf992..e5c73179d9 100755 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -42,15 +42,17 @@ ) from pymc3.distributions.shape_utils import broadcast_dist_samples_to, to_tuple from pymc3.distributions.special import gammaln, multigammaln +from pymc3.exceptions import ShapeError from pymc3.math import kron_diag, kron_dot, kron_solve_lower, kronecker from pymc3.model import Deterministic -from pymc3.theanof import floatX +from pymc3.theanof import floatX, intX __all__ = [ "MvNormal", "MvStudentT", "Dirichlet", "Multinomial", + "DirichletMultinomial", "Wishart", "WishartBartlett", "LKJCorr", @@ -690,6 +692,160 @@ def logp(self, x): ) +class DirichletMultinomial(Discrete): + R"""Dirichlet Multinomial log-likelihood. + + Dirichlet mixture of Multinomials distribution, with a marginalized PMF. + + .. math:: + + f(x \mid n, a) = \frac{\Gamma(n + 1)\Gamma(\sum a_k)} + {\Gamma(\n + \sum a_k)} + \prod_{k=1}^K + \frac{\Gamma(x_k + a_k)} + {\Gamma(x_k + 1)\Gamma(a_k)} + + ========== =========================================== + Support :math:`x \in \{0, 1, \ldots, n\}` such that + :math:`\sum x_i = n` + Mean :math:`n \frac{a_i}{\sum{a_k}}` + ========== =========================================== + + Parameters + ---------- + n : int or array + Total counts in each replicate. If n is an array its shape must be (N,) + with N = a.shape[0] + + a : one- or two-dimensional array + Dirichlet parameter. Elements must be strictly positive. + The number of categories is given by the length of the last axis. + + shape : integer tuple + Describes shape of distribution. For example if n=array([5, 10]), and + a=array([1, 1, 1]), shape should be (2, 3). + """ + + def __init__(self, n, a, shape, *args, **kwargs): + + super().__init__(shape=shape, defaults=("_defaultval",), *args, **kwargs) + + n = intX(n) + a = floatX(a) + if len(self.shape) > 1: + self.n = tt.shape_padright(n) + self.a = tt.as_tensor_variable(a) if a.ndim > 1 else tt.shape_padleft(a) + else: + # n is a scalar, p is a 1d array + self.n = tt.as_tensor_variable(n) + self.a = tt.as_tensor_variable(a) + + p = self.a / self.a.sum(-1, keepdims=True) + + self.mean = self.n * p + # Mode is only an approximation. Exact computation requires a complex + # iterative algorithm as described in https://doi.org/10.1016/j.spl.2009.09.013 + mode = tt.cast(tt.round(self.mean), "int32") + diff = self.n - tt.sum(mode, axis=-1, keepdims=True) + inc_bool_arr = tt.abs_(diff) > 0 + mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()], diff[inc_bool_arr.nonzero()]) + self._defaultval = mode + + def _random(self, n, a, size=None): + # numpy will cast dirichlet and multinomial samples to float64 by default + original_dtype = a.dtype + + # Thanks to the default shape handling done in generate_values, the last + # axis of n is a dummy axis that allows it to broadcast well with `a` + n = np.broadcast_to(n, size) + a = np.broadcast_to(a, size) + n = n[..., 0] + + # np.random.multinomial needs `n` to be a scalar int and `a` a + # sequence so we semi flatten them and iterate over them + n_ = n.reshape([-1]) + a_ = a.reshape([-1, a.shape[-1]]) + p_ = np.array([np.random.dirichlet(aa) for aa in a_]) + samples = np.array([np.random.multinomial(nn, pp) for nn, pp in zip(n_, p_)]) + samples = samples.reshape(a.shape) + + # We cast back to the original dtype + return samples.astype(original_dtype) + + def random(self, point=None, size=None): + """ + Draw random values from Dirichlet-Multinomial distribution. + + Parameters + ---------- + point: dict, optional + Dict of variable values on which random values are to be + conditioned (uses default point if not specified). + size: int, optional + Desired size of random sample (returns one sample if not + specified). + + Returns + ------- + array + """ + n, a = draw_values([self.n, self.a], point=point, size=size) + samples = generate_samples( + self._random, + n, + a, + dist_shape=self.shape, + size=size, + ) + + # If distribution is initialized with .dist(), valid init shape is not asserted. + # Under normal use in a model context valid init shape is asserted at start. + expected_shape = to_tuple(size) + to_tuple(self.shape) + sample_shape = tuple(samples.shape) + if sample_shape != expected_shape: + raise ShapeError( + f"Expected sample shape was {expected_shape} but got {sample_shape}. " + "This may reflect an invalid initialization shape." + ) + + return samples + + def logp(self, value): + """ + Calculate log-probability of DirichletMultinomial distribution + at specified value. + + Parameters + ---------- + value: integer array + Value for which log-probability is calculated. + + Returns + ------- + TensorVariable + """ + a = self.a + n = self.n + sum_a = a.sum(axis=-1, keepdims=True) + + const = (gammaln(n + 1) + gammaln(sum_a)) - gammaln(n + sum_a) + series = gammaln(value + a) - (gammaln(value + 1) + gammaln(a)) + result = const + series.sum(axis=-1, keepdims=True) + # Bounds checking to confirm parameters and data meet all constraints + # and that each observation value_i sums to n_i. + return bound( + result, + tt.all(tt.ge(value, 0)), + tt.all(tt.gt(a, 0)), + tt.all(tt.ge(n, 0)), + tt.all(tt.eq(value.sum(axis=-1, keepdims=True), n)), + broadcast_conditions=False, + ) + + def _distr_parameters_for_repr(self): + return ["n", "a"] + + def posdef(AA): try: linalg.cholesky(AA) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index dcfceee441..e15996e7ab 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -47,6 +47,7 @@ Constant, DensityDist, Dirichlet, + DirichletMultinomial, DiscreteUniform, DiscreteWeibull, ExGaussian, @@ -278,6 +279,21 @@ def multinomial_logpdf(value, n, p): return -inf +def dirichlet_multinomial_logpmf(value, n, a): + value, n, a = [np.asarray(x) for x in [value, n, a]] + assert value.ndim == 1 + assert n.ndim == 0 + assert a.shape == value.shape + gammaln = scipy.special.gammaln + if value.sum() == n and (0 <= value).all() and (value <= n).all(): + sum_a = a.sum(axis=-1) + const = gammaln(n + 1) + gammaln(sum_a) - gammaln(n + sum_a) + series = gammaln(value + a) - gammaln(value + 1) - gammaln(a) + return const + series.sum(axis=-1) + else: + return -inf + + def beta_mu_sigma(value, mu, sigma): kappa = mu * (1 - mu) / sigma ** 2 - 1 if kappa > 0: @@ -1748,6 +1764,145 @@ def test_batch_multinomial(self): sample = dist.random(size=2) assert_allclose(sample, np.stack([vals, vals], axis=0)) + @pytest.mark.parametrize("n", [2, 3]) + def test_dirichlet_multinomial(self, n): + self.pymc3_matches_scipy( + DirichletMultinomial, + Vector(Nat, n), + {"a": Vector(Rplus, n), "n": Nat}, + dirichlet_multinomial_logpmf, + ) + + def test_dirichlet_multinomial_matches_beta_binomial(self): + a, b, n = 2, 1, 5 + ns = np.arange(n + 1) + ns_dm = np.vstack((ns, n - ns)).T # covert ns=1 to ns_dm=[1, 4], for all ns... + bb_logp = pm.BetaBinomial.dist(n=n, alpha=a, beta=b).logp(ns).tag.test_value + dm_logp = ( + pm.DirichletMultinomial.dist(n=n, a=[a, b], shape=(1, 2)).logp(ns_dm).tag.test_value + ) + dm_logp = dm_logp.ravel() + assert_almost_equal( + dm_logp, + bb_logp, + decimal=select_by_precision(float64=6, float32=3), + ) + + @pytest.mark.parametrize( + "a, n, shape", + [ + [[0.25, 0.25, 0.25, 0.25], 1, (1, 4)], + [[0.3, 0.6, 0.05, 0.05], 2, (1, 4)], + [[0.3, 0.6, 0.05, 0.05], 10, (1, 4)], + [[0.25, 0.25, 0.25, 0.25], 1, (2, 4)], + [[0.3, 0.6, 0.05, 0.05], 2, (3, 4)], + [[[0.25, 0.25, 0.25, 0.25], [0.26, 0.26, 0.26, 0.22]], [1, 10], (2, 4)], + ], + ) + def test_dirichlet_multinomial_defaultval(self, a, n, shape): + a = np.asarray(a) + with Model() as model: + m = DirichletMultinomial("m", n=n, a=a, shape=shape) + assert_allclose(m.distribution._defaultval.eval().sum(axis=-1), n) + + def test_dirichlet_multinomial_vec(self): + vals = np.array([[2, 4, 4], [3, 3, 4]]) + a = np.array([0.2, 0.3, 0.5]) + n = 10 + + with Model() as model_single: + DirichletMultinomial("m", n=n, a=a, shape=len(a)) + + with Model() as model_many: + DirichletMultinomial("m", n=n, a=a, shape=vals.shape) + + assert_almost_equal( + np.asarray([dirichlet_multinomial_logpmf(v, n, a) for v in vals]), + np.asarray([model_single.fastlogp({"m": val}) for val in vals]), + decimal=4, + ) + + assert_almost_equal( + np.asarray([dirichlet_multinomial_logpmf(v, n, a) for v in vals]), + model_many.free_RVs[0].logp_elemwise({"m": vals}).squeeze(), + decimal=4, + ) + + assert_almost_equal( + sum([model_single.fastlogp({"m": val}) for val in vals]), + model_many.fastlogp({"m": vals}), + decimal=4, + ) + + def test_dirichlet_multinomial_vec_1d_n(self): + vals = np.array([[2, 4, 4], [4, 3, 4]]) + a = np.array([0.2, 0.3, 0.5]) + ns = np.array([10, 11]) + + with Model() as model: + DirichletMultinomial("m", n=ns, a=a, shape=vals.shape) + + assert_almost_equal( + sum([dirichlet_multinomial_logpmf(val, n, a) for val, n in zip(vals, ns)]), + model.fastlogp({"m": vals}), + decimal=4, + ) + + def test_dirichlet_multinomial_vec_1d_n_2d_a(self): + vals = np.array([[2, 4, 4], [4, 3, 4]]) + as_ = np.array([[0.2, 0.3, 0.5], [0.9, 0.09, 0.01]]) + ns = np.array([10, 11]) + + with Model() as model: + DirichletMultinomial("m", n=ns, a=as_, shape=vals.shape) + + assert_almost_equal( + sum([dirichlet_multinomial_logpmf(val, n, a) for val, n, a in zip(vals, ns, as_)]), + model.fastlogp({"m": vals}), + decimal=4, + ) + + def test_dirichlet_multinomial_vec_2d_a(self): + vals = np.array([[2, 4, 4], [3, 3, 4]]) + as_ = np.array([[0.2, 0.3, 0.5], [0.3, 0.3, 0.4]]) + n = 10 + + with Model() as model: + DirichletMultinomial("m", n=n, a=as_, shape=vals.shape) + + assert_almost_equal( + sum([dirichlet_multinomial_logpmf(val, n, a) for val, a in zip(vals, as_)]), + model.fastlogp({"m": vals}), + decimal=4, + ) + + def test_batch_dirichlet_multinomial(self): + # Test that DM can handle a 3d array for `a` + + # Create an almost deterministic DM by setting a to 0.001, everywehere + # except for one category / dimension which is given the value of 1000 + n = 5 + vals = np.zeros((4, 5, 3), dtype="int32") + a = np.zeros_like(vals, dtype=theano.config.floatX) + 0.001 + inds = np.random.randint(vals.shape[-1], size=vals.shape[:-1])[..., None] + np.put_along_axis(vals, inds, n, axis=-1) + np.put_along_axis(a, inds, 1000, axis=-1) + + dist = DirichletMultinomial.dist(n=n, a=a, shape=vals.shape) + + # Logp should be approx -9.924431e-06 + dist_logp = dist.logp(vals).tag.test_value + expected_logp = np.full(shape=vals.shape[:-1] + (1,), fill_value=-9.924431e-06) + assert_almost_equal( + dist_logp, + expected_logp, + decimal=select_by_precision(float64=6, float32=3), + ) + + # Samples should be equal given the almost deterministic DM + sample = dist.random(size=2) + assert_allclose(sample, np.stack([vals, vals], axis=0)) + def test_categorical_bounds(self): with Model(): x = Categorical("x", p=np.array([0.2, 0.3, 0.5])) @@ -2096,6 +2251,9 @@ def setup_class(self): shape=(n, n), ) + # DirichletMultinomial + dm = DirichletMultinomial("dm", n=5, a=[1, 1, 1], shape=(2, 3)) + # Likelihood (sampling distribution) of observations Y_obs = Normal("Y_obs", mu=mu, sigma=sigma, observed=Y) @@ -2113,6 +2271,7 @@ def setup_class(self): r"$\text{bound_var} \sim \text{Bound}$ -- \text{Normal}$", r"$\text{kron_normal} \sim \text{KroneckerNormal}$", r"$\text{mat_normal} \sim \text{MatrixNormal}$", + r"$\text{dm} \sim \text{DirichletMultinomial}$", ), "plain": ( r"alpha ~ Normal", @@ -2126,6 +2285,7 @@ def setup_class(self): r"bound_var ~ Bound-Normal", r"kron_normal ~ KroneckerNormal", r"mat_normal ~ MatrixNormal", + r"dm ~ DirichletMultinomial", ), "latex_with_params": ( r"$\text{alpha} \sim \text{Normal}(\mathit{mu}=0.0,~\mathit{sigma}=10.0)$", @@ -2139,6 +2299,7 @@ def setup_class(self): r"$\text{bound_var} \sim \text{Bound}(\mathit{lower}=1.0,~\mathit{upper}=\text{None})$ -- \text{Normal}(\mathit{mu}=0.0,~\mathit{sigma}=10.0)$", r"$\text{kron_normal} \sim \text{KroneckerNormal}(\mathit{mu}=array)$", r"$\text{mat_normal} \sim \text{MatrixNormal}(\mathit{mu}=array,~\mathit{rowcov}=array,~\mathit{colchol_cov}=array)$", + r"$\text{dm} \sim \text{DirichletMultinomial}(\mathit{n}=5,~\mathit{a}=array)$", ), "plain_with_params": ( r"alpha ~ Normal(mu=0.0, sigma=10.0)", @@ -2152,6 +2313,7 @@ def setup_class(self): r"bound_var ~ Bound(lower=1.0, upper=None)-Normal(mu=0.0, sigma=10.0)", r"kron_normal ~ KroneckerNormal(mu=array)", r"mat_normal ~ MatrixNormal(mu=array, rowcov=array, colchol_cov=array)", + r"dm∼DirichletMultinomial(n=5, a=array)", ), } diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index 6cc4fe8042..2513cd3029 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -15,6 +15,8 @@ import itertools import sys +from contextlib import ExitStack as does_not_raise + import numpy as np import numpy.random as nr import numpy.testing as npt @@ -34,6 +36,7 @@ draw_values, to_tuple, ) +from pymc3.exceptions import ShapeError from pymc3.tests.helpers import SeededTest from pymc3.tests.test_distributions import ( Domain, @@ -1005,6 +1008,70 @@ def ref_rand(size, a): ref_rand=ref_rand, ) + def test_dirichlet_multinomial(self): + def ref_rand(size, a, n): + k = a.shape[-1] + out = np.empty((size, k), dtype=int) + for i in range(size): + p = nr.dirichlet(a) + x = nr.multinomial(n=n, pvals=p) + out[i, :] = x + return out + + for n in [2, 3]: + pymc3_random_discrete( + pm.DirichletMultinomial, + {"a": Vector(Rplus, n), "n": Nat}, + valuedomain=Vector(Nat, n), + size=1000, + ref_rand=ref_rand, + ) + + @pytest.mark.parametrize( + "a, shape, n", + [ + [[0.25, 0.25, 0.25, 0.25], 4, 2], + [[0.25, 0.25, 0.25, 0.25], (1, 4), 3], + [[0.25, 0.25, 0.25, 0.25], (10, 4), [2] * 10], + [[0.25, 0.25, 0.25, 0.25], (10, 1, 4), 5], + [[[0.25, 0.25, 0.25, 0.25]], (2, 4), [7, 11]], + [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (2, 4), 13], + [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (1, 2, 4), [23, 29]], + [ + [[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], + (10, 2, 4), + [31, 37], + ], + [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (2, 4), [17, 19]], + ], + ) + def test_dirichlet_multinomial_shape(self, a, shape, n): + a = np.asarray(a) + with pm.Model() as model: + m = pm.DirichletMultinomial("m", n=n, a=a, shape=shape) + samp0 = m.random() + samp1 = m.random(size=1) + samp2 = m.random(size=2) + + shape_ = to_tuple(shape) + assert to_tuple(samp0.shape) == shape_ + assert to_tuple(samp1.shape) == (1, *shape_) + assert to_tuple(samp2.shape) == (2, *shape_) + + @pytest.mark.parametrize( + "n, a, shape, expectation", + [ + ([5], [[1000, 1, 1], [1, 1, 1000]], (2, 3), does_not_raise()), + ([5, 3], [[1000, 1, 1], [1, 1, 1000]], (2, 3), does_not_raise()), + ([[5]], [[1000, 1, 1], [1, 1, 1000]], (2, 3), pytest.raises(ShapeError)), + ([[5], [3]], [[1000, 1, 1], [1, 1, 1000]], (2, 3), pytest.raises(ShapeError)), + ], + ) + def test_dirichlet_multinomial_dist_ShapeError(self, n, a, shape, expectation): + m = pm.DirichletMultinomial.dist(n=n, a=a, shape=shape) + with expectation: + m.random() + def test_multinomial(self): def ref_rand(size, p, n): return nr.multinomial(pvals=p, n=n, size=size)