From 487dfe21e4199b5f891dc235a45e7fb23cd4f804 Mon Sep 17 00:00:00 2001 From: Muhammad Rebaal <122050424+Muhammad-Rebaal@users.noreply.github.com> Date: Sat, 15 Mar 2025 22:04:23 +0500 Subject: [PATCH 1/3] Implement the rng function --- pymc/distributions/multivariate.py | 55 ++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py index 04ff858e34..3de192a7ca 100644 --- a/pymc/distributions/multivariate.py +++ b/pymc/distributions/multivariate.py @@ -2354,8 +2354,59 @@ def __call__(self, W, sigma, zero_sum_stdev, size=None, **kwargs): return super().__call__(W, sigma, zero_sum_stdev, size=size, **kwargs) @classmethod - def rng_fn(cls, rng, size, W, sigma, zero_sum_stdev): - raise NotImplementedError("Cannot sample from ICAR prior") + def rng_fn(cls, rng, W, sigma, zero_sum_stdev, size=None): + """Sample from the ICAR distribution. + + The ICAR distribution is a special case of the CAR distribution with alpha=1. + It generates spatial random effects where neighboring areas tend to have + similar values. The precision matrix is the graph Laplacian of W. + + Parameters + ---------- + rng : numpy.random.Generator + Random number generator + W : ndarray + Symmetric adjacency matrix + sigma : float + Standard deviation parameter + zero_sum_stdev : float + Controls how strongly to enforce the zero-sum constraint + size : tuple, optional + Size of the samples to generate + + Returns + ------- + ndarray + Samples from the ICAR distribution + """ + W = np.asarray(W) + N = W.shape[0] + + # Construct the precision matrix (graph Laplacian) + D = np.diag(W.sum(axis=1)) + Q = D - W + + # Add regularization for the zero eigenvalue based on zero_sum_stdev + zero_sum_precision = 1.0 / (zero_sum_stdev * N)**2 + Q_reg = Q + zero_sum_precision * np.ones((N, N)) / N + + # Use eigendecomposition to handle the degenerate covariance + eigvals, eigvecs = np.linalg.eigh(Q_reg) + + # Construct the covariance matrix + cov = eigvecs @ np.diag(1.0 / eigvals) @ eigvecs.T + + # Scale by sigma^2 + cov = sigma**2 * cov + + # Generate samples + mean = np.zeros(N) + + # Handle different size specifications + if size is None: + return rng.multivariate_normal(mean, cov) + else: + return rng.multivariate_normal(mean, cov, size=size) icar = ICARRV() From 3fe35509fc965ebfef200d73d254a99f1fbfeeae Mon Sep 17 00:00:00 2001 From: Muhammad Rebaal <122050424+Muhammad-Rebaal@users.noreply.github.com> Date: Sun, 23 Mar 2025 18:31:55 +0500 Subject: [PATCH 2/3] Modified the rng_fn and added the test --- pymc/distributions/multivariate.py | 189 +++++++++++------------ tests/distributions/test_multivariate.py | 72 ++++++++- 2 files changed, 157 insertions(+), 104 deletions(-) diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py index 3de192a7ca..4d5bebc2e4 100644 --- a/pymc/distributions/multivariate.py +++ b/pymc/distributions/multivariate.py @@ -2156,56 +2156,56 @@ def make_node(self, rng, size, mu, W, alpha, tau, W_is_valid): return super().make_node(rng, size, mu, W, alpha, tau, W_is_valid) @classmethod - def rng_fn(cls, rng: np.random.RandomState, mu, W, alpha, tau, W_is_valid, size): - """Sample a numeric random variate. - - Implementation of algorithm from paper - Havard Rue, 2001. "Fast sampling of Gaussian Markov random fields," - Journal of the Royal Statistical Society Series B, Royal Statistical Society, - vol. 63(2), pages 325-338. DOI: 10.1111/1467-9868.00288. + def rng_fn(cls, rng, mu, W, alpha, tau, W_is_valid, size=None): + """Sample from the CAR distribution. + + Parameters + ---------- + rng : numpy.random.Generator + Random number generator + mu : ndarray + Mean vector + W : ndarray + Symmetric adjacency matrix + alpha : float + Autoregression parameter (-1 < alpha < 1) + tau : float + Precision parameter (tau > 0) + W_is_valid : bool + Flag indicating whether W is a valid adjacency matrix + size : tuple, optional + Size of the samples to generate + + Returns + ------- + ndarray + Samples from the CAR distribution """ - if not W_is_valid.all(): - raise ValueError("W must be a valid adjacency matrix") + if not W_is_valid: + raise ValueError("W must be a symmetric adjacency matrix") if np.any(alpha >= 1) or np.any(alpha <= -1): raise ValueError("the domain of alpha is: -1 < alpha < 1") - - # TODO: If there are batch dims, even if W was already sparse, - # we will have some expensive dense_from_sparse and sparse_from_dense - # operations that we should avoid. See https://github.com/pymc-devs/pytensor/issues/839 - W = _squeeze_to_ndim(W, 2) - if not scipy.sparse.issparse(W): - W = scipy.sparse.csr_matrix(W) - tau = scipy.sparse.csr_matrix(_squeeze_to_ndim(tau, 0)) - alpha = scipy.sparse.csr_matrix(_squeeze_to_ndim(alpha, 0)) - - s = np.asarray(W.sum(axis=0))[0] - D = scipy.sparse.diags(s) - - Q = tau.multiply(D - alpha.multiply(W)) - - perm_array = scipy.sparse.csgraph.reverse_cuthill_mckee(Q, symmetric_mode=True) - inv_perm = np.argsort(perm_array) - - Q = Q[perm_array, :][:, perm_array] - - Qb = Q.diagonal() - u = 1 - while np.count_nonzero(Q.diagonal(u)) > 0: - Qb = np.vstack((np.pad(Q.diagonal(u), (u, 0), constant_values=(0, 0)), Qb)) - u += 1 - - L = scipy.linalg.cholesky_banded(Qb, lower=False) - - size = tuple(size or ()) - if size: - mu = np.broadcast_to(mu, (*size, mu.shape[-1])) - z = rng.normal(size=mu.shape) - samples = np.empty(z.shape) - for idx in np.ndindex(mu.shape[:-1]): - samples[idx] = scipy.linalg.cho_solve_banded((L, False), z[idx]) + mu[idx][perm_array] - samples = samples[..., inv_perm] - return samples + + W = np.asarray(W) + N = W.shape[0] + + # Construct the precision matrix + D = np.diag(W.sum(axis=1)) + Q = tau * (D - alpha * W) + + # Convert precision to covariance matrix + cov = np.linalg.inv(Q) + + # Generate samples using multivariate_normal with covariance matrix + mean = np.zeros(N) if mu is None else np.asarray(mu) + + return stats.multivariate_normal.rvs( + mean=mean, + cov=cov, + size=size, + random_state=rng + ) car = CARRV() @@ -2342,6 +2342,8 @@ def logp(value, mu, W, alpha, tau, W_is_valid): W_is_valid, msg="-1 < alpha < 1, tau > 0, W is a symmetric adjacency matrix.", ) + + class ICARRV(RandomVariable): @@ -2355,58 +2357,49 @@ def __call__(self, W, sigma, zero_sum_stdev, size=None, **kwargs): @classmethod def rng_fn(cls, rng, W, sigma, zero_sum_stdev, size=None): - """Sample from the ICAR distribution. - - The ICAR distribution is a special case of the CAR distribution with alpha=1. - It generates spatial random effects where neighboring areas tend to have - similar values. The precision matrix is the graph Laplacian of W. - - Parameters - ---------- - rng : numpy.random.Generator - Random number generator - W : ndarray - Symmetric adjacency matrix - sigma : float - Standard deviation parameter - zero_sum_stdev : float - Controls how strongly to enforce the zero-sum constraint - size : tuple, optional - Size of the samples to generate - - Returns - ------- - ndarray - Samples from the ICAR distribution - """ - W = np.asarray(W) - N = W.shape[0] - - # Construct the precision matrix (graph Laplacian) - D = np.diag(W.sum(axis=1)) - Q = D - W - - # Add regularization for the zero eigenvalue based on zero_sum_stdev - zero_sum_precision = 1.0 / (zero_sum_stdev * N)**2 - Q_reg = Q + zero_sum_precision * np.ones((N, N)) / N - - # Use eigendecomposition to handle the degenerate covariance - eigvals, eigvecs = np.linalg.eigh(Q_reg) - - # Construct the covariance matrix - cov = eigvecs @ np.diag(1.0 / eigvals) @ eigvecs.T - - # Scale by sigma^2 - cov = sigma**2 * cov - - # Generate samples - mean = np.zeros(N) - - # Handle different size specifications - if size is None: - return rng.multivariate_normal(mean, cov) - else: - return rng.multivariate_normal(mean, cov, size=size) + """Sample from the ICAR distribution. + + Parameters + ---------- + rng : numpy.random.Generator + Random number generator + W : ndarray + Symmetric adjacency matrix + sigma : float + Standard deviation parameter + zero_sum_stdev : float + Controls how strongly to enforce the zero-sum constraint + size : tuple, optional + Size of the samples to generate + + Returns + ------- + ndarray + Samples from the ICAR distribution + """ + W = np.asarray(W) + N = W.shape[0] + + # Construct the precision matrix (graph Laplacian) + D = np.diag(W.sum(axis=1)) + Q = D - W + + # Add regularization for the zero eigenvalue based on zero_sum_stdev + zero_sum_precision = 1.0 / (zero_sum_stdev * N)**2 + Q_reg = Q + zero_sum_precision * np.ones((N, N)) / N + + # Convert precision to covariance matrix + cov = np.linalg.inv(Q_reg) + + # Generate samples using multivariate_normal with covariance matrix + mean = np.zeros(N) + + return sigma * stats.multivariate_normal.rvs( + mean=mean, + cov=cov, + size=size, + random_state=rng + ) icar = ICARRV() diff --git a/tests/distributions/test_multivariate.py b/tests/distributions/test_multivariate.py index cb4b8520b9..356c5e5be8 100644 --- a/tests/distributions/test_multivariate.py +++ b/tests/distributions/test_multivariate.py @@ -2249,6 +2249,42 @@ def check_draws_match_expected(self): assert np.all(np.abs(draw(x, random_seed=rng) - np.array([0.5, 0, 2.0])) < 0.01) +class TestCAR: + def test_car_rng_fn(self): + """Test the random number generator for the CAR distribution.""" + # Create a simple adjacency matrix for a grid + W = np.array([ + [0, 1, 0, 1], + [1, 0, 1, 0], + [0, 1, 0, 1], + [1, 0, 1, 0] + ], dtype=np.int32) # Explicitly set dtype + + rng = np.random.default_rng(42) + mu = np.array([1.0, 2.0, 3.0, 4.0]) + alpha = 0.7 + tau = 0.5 + + # Generate samples - use W directly instead of tensor + car_dist = pm.CAR.dist(mu=mu, W=W, alpha=alpha, tau=tau) + car_samples = np.array([draw(car_dist, random_seed=rng) for _ in range(1000)]) + + # Test shape + assert car_samples.shape == (1000, 4) + + # Test mean + assert np.allclose(car_samples.mean(axis=0), mu, atol=0.1) + + # Test covariance structure - neighbors should be more correlated + sample_corr = np.corrcoef(car_samples.T) + for i in range(4): + for j in range(4): + if i != j: + # Neighbors should have higher correlation than non-neighbors + if W[i, j] == 1: + assert sample_corr[i, j] > 0 + + class TestICAR(BaseTestDistributionRandom): pymc_dist = pm.ICAR pymc_dist_params = {"W": np.array([[0, 1, 1], [1, 0, 1], [1, 1, 0]]), "sigma": 2} @@ -2278,12 +2314,36 @@ def test_icar_logp(self): ).eval(), "logp inaccuracy" def test_icar_rng_fn(self): - W = np.array([[0, 1, 0, 1], [1, 0, 1, 0], [0, 1, 0, 1], [1, 0, 1, 0]]) - - RV = pm.ICAR.dist(W=W) - - with pytest.raises(NotImplementedError, match="Cannot sample from ICAR prior"): - pm.draw(RV) + """Test the random number generator for the ICAR distribution.""" + # Create a simple adjacency matrix for a grid + W = np.array([ + [0, 1, 0, 1], + [1, 0, 1, 0], + [0, 1, 0, 1], + [1, 0, 1, 0] + ], dtype=np.int32) # Explicitly set dtype + + rng = np.random.default_rng(42) + sigma = 2.0 + zero_sum_stdev = 0.001 + + # Use W directly instead of converting to tensor + icar_dist = pm.ICAR.dist(W=W, sigma=sigma, zero_sum_stdev=zero_sum_stdev) + icar_samples = np.array([draw(icar_dist, random_seed=rng) for _ in range(1000)]) + + # Test shape + assert icar_samples.shape == (1000, 4) + + # Test approximate zero-sum constraint + assert np.abs(icar_samples.sum(axis=1).mean()) < 0.1 + + # Test variance scale - expect variance ≈ sigma^2 * (N-1)/N due to constraint + var_scale = (W.shape[0] - 1) / W.shape[0] # Degrees of freedom adjustment + expected_var = sigma**2 * var_scale + observed_var = np.var(icar_samples, axis=1).mean() + + # Use a more generous tolerance to account for the zero sum constraint's impact on variance + assert np.abs(observed_var - expected_var) < 2.0 @pytest.mark.parametrize( "W,msg", From 5d7e99ac3fa99ec21d3fec391983d06bd3cb8592 Mon Sep 17 00:00:00 2001 From: Muhammad Rebaal <122050424+Muhammad-Rebaal@users.noreply.github.com> Date: Sun, 23 Mar 2025 18:59:46 +0500 Subject: [PATCH 3/3] Run the pre-commit --- pymc/distributions/multivariate.py | 44 +++++++++--------------- tests/distributions/test_multivariate.py | 44 ++++++++++-------------- 2 files changed, 36 insertions(+), 52 deletions(-) diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py index 4d5bebc2e4..a95cb33796 100644 --- a/pymc/distributions/multivariate.py +++ b/pymc/distributions/multivariate.py @@ -2158,7 +2158,7 @@ def make_node(self, rng, size, mu, W, alpha, tau, W_is_valid): @classmethod def rng_fn(cls, rng, mu, W, alpha, tau, W_is_valid, size=None): """Sample from the CAR distribution. - + Parameters ---------- rng : numpy.random.Generator @@ -2175,7 +2175,7 @@ def rng_fn(cls, rng, mu, W, alpha, tau, W_is_valid, size=None): Flag indicating whether W is a valid adjacency matrix size : tuple, optional Size of the samples to generate - + Returns ------- ndarray @@ -2186,26 +2186,21 @@ def rng_fn(cls, rng, mu, W, alpha, tau, W_is_valid, size=None): if np.any(alpha >= 1) or np.any(alpha <= -1): raise ValueError("the domain of alpha is: -1 < alpha < 1") - + W = np.asarray(W) N = W.shape[0] - + # Construct the precision matrix D = np.diag(W.sum(axis=1)) Q = tau * (D - alpha * W) - + # Convert precision to covariance matrix cov = np.linalg.inv(Q) - + # Generate samples using multivariate_normal with covariance matrix mean = np.zeros(N) if mu is None else np.asarray(mu) - - return stats.multivariate_normal.rvs( - mean=mean, - cov=cov, - size=size, - random_state=rng - ) + + return stats.multivariate_normal.rvs(mean=mean, cov=cov, size=size, random_state=rng) car = CARRV() @@ -2342,8 +2337,6 @@ def logp(value, mu, W, alpha, tau, W_is_valid): W_is_valid, msg="-1 < alpha < 1, tau > 0, W is a symmetric adjacency matrix.", ) - - class ICARRV(RandomVariable): @@ -2358,7 +2351,7 @@ def __call__(self, W, sigma, zero_sum_stdev, size=None, **kwargs): @classmethod def rng_fn(cls, rng, W, sigma, zero_sum_stdev, size=None): """Sample from the ICAR distribution. - + Parameters ---------- rng : numpy.random.Generator @@ -2371,7 +2364,7 @@ def rng_fn(cls, rng, W, sigma, zero_sum_stdev, size=None): Controls how strongly to enforce the zero-sum constraint size : tuple, optional Size of the samples to generate - + Returns ------- ndarray @@ -2379,26 +2372,23 @@ def rng_fn(cls, rng, W, sigma, zero_sum_stdev, size=None): """ W = np.asarray(W) N = W.shape[0] - + # Construct the precision matrix (graph Laplacian) D = np.diag(W.sum(axis=1)) Q = D - W - + # Add regularization for the zero eigenvalue based on zero_sum_stdev - zero_sum_precision = 1.0 / (zero_sum_stdev * N)**2 + zero_sum_precision = 1.0 / (zero_sum_stdev * N) ** 2 Q_reg = Q + zero_sum_precision * np.ones((N, N)) / N - + # Convert precision to covariance matrix cov = np.linalg.inv(Q_reg) - + # Generate samples using multivariate_normal with covariance matrix mean = np.zeros(N) - + return sigma * stats.multivariate_normal.rvs( - mean=mean, - cov=cov, - size=size, - random_state=rng + mean=mean, cov=cov, size=size, random_state=rng ) diff --git a/tests/distributions/test_multivariate.py b/tests/distributions/test_multivariate.py index 356c5e5be8..0939c78533 100644 --- a/tests/distributions/test_multivariate.py +++ b/tests/distributions/test_multivariate.py @@ -2253,28 +2253,25 @@ class TestCAR: def test_car_rng_fn(self): """Test the random number generator for the CAR distribution.""" # Create a simple adjacency matrix for a grid - W = np.array([ - [0, 1, 0, 1], - [1, 0, 1, 0], - [0, 1, 0, 1], - [1, 0, 1, 0] - ], dtype=np.int32) # Explicitly set dtype - + W = np.array( + [[0, 1, 0, 1], [1, 0, 1, 0], [0, 1, 0, 1], [1, 0, 1, 0]], dtype=np.int32 + ) # Explicitly set dtype + rng = np.random.default_rng(42) mu = np.array([1.0, 2.0, 3.0, 4.0]) alpha = 0.7 tau = 0.5 - + # Generate samples - use W directly instead of tensor car_dist = pm.CAR.dist(mu=mu, W=W, alpha=alpha, tau=tau) car_samples = np.array([draw(car_dist, random_seed=rng) for _ in range(1000)]) - + # Test shape assert car_samples.shape == (1000, 4) - + # Test mean assert np.allclose(car_samples.mean(axis=0), mu, atol=0.1) - + # Test covariance structure - neighbors should be more correlated sample_corr = np.corrcoef(car_samples.T) for i in range(4): @@ -2316,34 +2313,31 @@ def test_icar_logp(self): def test_icar_rng_fn(self): """Test the random number generator for the ICAR distribution.""" # Create a simple adjacency matrix for a grid - W = np.array([ - [0, 1, 0, 1], - [1, 0, 1, 0], - [0, 1, 0, 1], - [1, 0, 1, 0] - ], dtype=np.int32) # Explicitly set dtype - - rng = np.random.default_rng(42) + W = np.array( + [[0, 1, 0, 1], [1, 0, 1, 0], [0, 1, 0, 1], [1, 0, 1, 0]], dtype=np.int32 + ) # Explicitly set dtype + + rng = np.random.default_rng(42) sigma = 2.0 zero_sum_stdev = 0.001 - + # Use W directly instead of converting to tensor icar_dist = pm.ICAR.dist(W=W, sigma=sigma, zero_sum_stdev=zero_sum_stdev) icar_samples = np.array([draw(icar_dist, random_seed=rng) for _ in range(1000)]) - + # Test shape assert icar_samples.shape == (1000, 4) - + # Test approximate zero-sum constraint assert np.abs(icar_samples.sum(axis=1).mean()) < 0.1 - + # Test variance scale - expect variance ≈ sigma^2 * (N-1)/N due to constraint var_scale = (W.shape[0] - 1) / W.shape[0] # Degrees of freedom adjustment expected_var = sigma**2 * var_scale observed_var = np.var(icar_samples, axis=1).mean() - + # Use a more generous tolerance to account for the zero sum constraint's impact on variance - assert np.abs(observed_var - expected_var) < 2.0 + assert np.abs(observed_var - expected_var) < 2.0 @pytest.mark.parametrize( "W,msg",