Fix MatrixNormal.random (#4368)

Sayam753 · web-flow · commit 91993d880d3e · 2020-12-30T16:58:49.000+01:00
* Fix MatrixNormal.random

* Provided better context for tests

* Worked on suggestions

* Added a test to signify need of transpose of cholesky matrix.

* Used np.swapaxes to take transpose

* Given a mention in release notes
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
@@ -27,6 +27,7 @@ It also brings some dreadfully awaited fixes, so be sure to go through the chang
 - `math.logsumexp` now matches `scipy.special.logsumexp` when arrays contain infinite values (see [#4360](https://github.com/pymc-devs/pymc3/pull/4360)).
 - Fixed mathematical formulation in `MvStudentT` random method. (see [#4359](https://github.com/pymc-devs/pymc3/pull/4359))
 - Fix issue in `logp` method of `HyperGeometric`. It now returns `-inf` for invalid parameters (see [4367](https://github.com/pymc-devs/pymc3/pull/4367))
+- Fixed `MatrixNormal` random method to work with parameters as random variables. (see [#4368](https://github.com/pymc-devs/pymc3/pull/4368))
 
 ## PyMC3 3.10.0 (7 December 2020)
 
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
@@ -1445,7 +1445,7 @@ class MatrixNormal(Continuous):
 
     .. math::
        f(x \mid \mu, U, V) =
-           \frac{1}{(2\pi |U|^n |V|^m)^{1/2}}
+           \frac{1}{(2\pi^{m n} |U|^n |V|^m)^{1/2}}
            \exp\left\{
                 -\frac{1}{2} \mathrm{Tr}[ V^{-1} (x-\mu)^{\prime} U^{-1} (x-\mu)]
             \right\}
@@ -1637,27 +1637,19 @@ def random(self, point=None, size=None):
         mu, colchol, rowchol = draw_values(
             [self.mu, self.colchol_cov, self.rowchol_cov], point=point, size=size
         )
-        if size is None:
-            size = ()
-        if size in (None, ()):
-            standard_normal = np.random.standard_normal((self.shape[0], colchol.shape[-1]))
-            samples = mu + np.matmul(rowchol, np.matmul(standard_normal, colchol.T))
-        else:
-            samples = []
-            size = tuple(np.atleast_1d(size))
-            if mu.shape == tuple(self.shape):
-                for _ in range(np.prod(size)):
-                    standard_normal = np.random.standard_normal((self.shape[0], colchol.shape[-1]))
-                    samples.append(mu + np.matmul(rowchol, np.matmul(standard_normal, colchol.T)))
-            else:
-                for j in range(np.prod(size)):
-                    standard_normal = np.random.standard_normal(
-                        (self.shape[0], colchol[j].shape[-1])
-                    )
-                    samples.append(
-                        mu[j] + np.matmul(rowchol[j], np.matmul(standard_normal, colchol[j].T))
-                    )
-            samples = np.array(samples).reshape(size + tuple(self.shape))
+        size = to_tuple(size)
+        dist_shape = to_tuple(self.shape)
+        output_shape = size + dist_shape
+
+        # Broadcasting all parameters
+        (mu,) = broadcast_dist_samples_to(to_shape=output_shape, samples=[mu], size=size)
+        rowchol = np.broadcast_to(rowchol, shape=size + rowchol.shape[-2:])
+
+        colchol = np.broadcast_to(colchol, shape=size + colchol.shape[-2:])
+        colchol = np.swapaxes(colchol, -1, -2)  # Take transpose
+
+        standard_normal = np.random.standard_normal(output_shape)
+        samples = mu + np.matmul(rowchol, np.matmul(standard_normal, colchol))
         return samples
 
     def _trquaddist(self, value):
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
@@ -849,6 +849,12 @@ def ref_rand_chol(size, mu, rowchol, colchol):
                 size, mu, rowcov=np.dot(rowchol, rowchol.T), colcov=np.dot(colchol, colchol.T)
             )
 
+        def ref_rand_chol_transpose(size, mu, rowchol, colchol):
+            colchol = colchol.T
+            return ref_rand(
+                size, mu, rowcov=np.dot(rowchol, rowchol.T), colcov=np.dot(colchol, colchol.T)
+            )
+
         def ref_rand_uchol(size, mu, rowchol, colchol):
             return ref_rand(
                 size, mu, rowcov=np.dot(rowchol.T, rowchol), colcov=np.dot(colchol.T, colchol)
@@ -858,7 +864,7 @@ def ref_rand_uchol(size, mu, rowchol, colchol):
             pymc3_random(
                 pm.MatrixNormal,
                 {"mu": RealMatrix(n, n), "rowcov": PdMatrix(n), "colcov": PdMatrix(n)},
-                size=n,
+                size=100,
                 valuedomain=RealMatrix(n, n),
                 ref_rand=ref_rand,
             )
@@ -867,7 +873,7 @@ def ref_rand_uchol(size, mu, rowchol, colchol):
             pymc3_random(
                 pm.MatrixNormal,
                 {"mu": RealMatrix(n, n), "rowchol": PdMatrixChol(n), "colchol": PdMatrixChol(n)},
-                size=n,
+                size=100,
                 valuedomain=RealMatrix(n, n),
                 ref_rand=ref_rand_chol,
             )
@@ -878,6 +884,22 @@ def ref_rand_uchol(size, mu, rowchol, colchol):
             #     extra_args={'lower': False}
             # )
 
+            # 2 sample test fails because cov becomes different if chol is transposed beforehand.
+            # This implicity means we need transpose of chol after drawing values in
+            # MatrixNormal.random method to match stats.matrix_normal.rvs method
+            with pytest.raises(AssertionError):
+                pymc3_random(
+                    pm.MatrixNormal,
+                    {
+                        "mu": RealMatrix(n, n),
+                        "rowchol": PdMatrixChol(n),
+                        "colchol": PdMatrixChol(n),
+                    },
+                    size=100,
+                    valuedomain=RealMatrix(n, n),
+                    ref_rand=ref_rand_chol_transpose,
+                )
+
     def test_kronecker_normal(self):
         def ref_rand(size, mu, covs, sigma):
             cov = pm.math.kronecker(covs[0], covs[1]).eval()
@@ -1675,3 +1697,26 @@ def test_issue_3706(self):
             prior_pred = pm.sample_prior_predictive(1)
 
         assert prior_pred["X"].shape == (1, N, 2)
+
+
+def test_matrix_normal_random_with_random_variables():
+    """
+    This test checks for shape correctness when using MatrixNormal distribution
+    with parameters as random variables.
+    Originally reported - https://github.com/pymc-devs/pymc3/issues/3585
+    """
+    K = 3
+    D = 15
+    mu_0 = np.zeros((D, K))
+    lambd = 1.0
+    with pm.Model() as model:
+        sd_dist = pm.HalfCauchy.dist(beta=2.5)
+        packedL = pm.LKJCholeskyCov("packedL", eta=2, n=D, sd_dist=sd_dist)
+        L = pm.expand_packed_triangular(D, packedL, lower=True)
+        Sigma = pm.Deterministic("Sigma", L.dot(L.T))  # D x D covariance
+        mu = pm.MatrixNormal(
+            "mu", mu=mu_0, rowcov=(1 / lambd) * Sigma, colcov=np.eye(K), shape=(D, K)
+        )
+        prior = pm.sample_prior_predictive(2)
+
+    assert prior["mu"].shape == (2, D, K)