From 063eea618fc79cef18350b818b4ac722d78ac21c Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Thu, 29 Nov 2018 08:01:11 -0500 Subject: [PATCH 1/7] switching travis badges to svg --- README.rst | 2 +- docs/source/semantic_sphinx/layout.html | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 5075fb02b8..24ed237ee6 100644 --- a/README.rst +++ b/README.rst @@ -181,7 +181,7 @@ Sponsors .. |Binder| image:: https://mybinder.org/badge.svg :target: https://mybinder.org/v2/gh/pymc-devs/pymc3/master?filepath=%2Fdocs%2Fsource%2Fnotebooks -.. |Build Status| image:: https://travis-ci.org/pymc-devs/pymc3.png?branch=master +.. |Build Status| image:: https://travis-ci.org/pymc-devs/pymc3.svg?branch=master :target: https://travis-ci.org/pymc-devs/pymc3 .. |Coverage| image:: https://coveralls.io/repos/github/pymc-devs/pymc3/badge.svg?branch=master :target: https://coveralls.io/github/pymc-devs/pymc3?branch=master diff --git a/docs/source/semantic_sphinx/layout.html b/docs/source/semantic_sphinx/layout.html index 6180d30f05..dd99163ece 100644 --- a/docs/source/semantic_sphinx/layout.html +++ b/docs/source/semantic_sphinx/layout.html @@ -76,7 +76,7 @@

Probabilistic Programming in Python

- + From 01fd6d992c493b1822561cea47abd8caf9480449 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Mon, 19 Aug 2019 16:26:35 -0400 Subject: [PATCH 2/7] Adding adaptation for dense mass matrix --- pymc3/step_methods/hmc/quadpotential.py | 160 +++++++++++++++++++++++- pymc3/tests/test_quadpotential.py | 61 +++++++++ 2 files changed, 220 insertions(+), 1 deletion(-) diff --git a/pymc3/step_methods/hmc/quadpotential.py b/pymc3/step_methods/hmc/quadpotential.py index 9ad1834ae2..1bb97b330e 100644 --- a/pymc3/step_methods/hmc/quadpotential.py +++ b/pymc3/step_methods/hmc/quadpotential.py @@ -8,7 +8,8 @@ __all__ = ['quad_potential', 'QuadPotentialDiag', 'QuadPotentialFull', - 'QuadPotentialFullInv', 'QuadPotentialDiagAdapt', 'isquadpotential'] + 'QuadPotentialFullInv', 'QuadPotentialDiagAdapt', + 'QuadPotentialFullAdapt', 'isquadpotential'] def quad_potential(C, is_cov): @@ -453,6 +454,163 @@ def velocity_energy(self, x, v_out): __call__ = random +class QuadPotentialFullAdapt(QuadPotential): + """Adapt a dense mass matrix using the sample covariances + + If the parameter ``doubling`` is true, the adaptation window is doubled + every time it is passed. This can lead to better convergence of the mass + matrix estimation. + + """ + def __init__( + self, + n, + initial_mean, + initial_cov=None, + initial_weight=0, + adaptation_window=101, + doubling=True, + dtype=None, + ): + if initial_cov is not None and initial_cov.ndim != 2: + raise ValueError("Initial covariance must be two-dimensional.") + if initial_mean.ndim != 1: + raise ValueError("Initial mean must be one-dimensional.") + if initial_cov is not None and initial_cov.shape != (n, n): + raise ValueError( + "Wrong shape for initial_cov: expected %s got %s" + % (n, initial_cov.shape) + ) + if len(initial_mean) != n: + raise ValueError( + "Wrong shape for initial_mean: expected %s got %s" + % (n, len(initial_mean)) + ) + + if dtype is None: + dtype = theano.config.floatX + + if initial_cov is None: + initial_cov = np.eye(n, dtype=dtype) + initial_weight = 1 + + self.dtype = dtype + self._n = n + self._cov = np.array(initial_cov, dtype=self.dtype, copy=True) + self._cov_theano = theano.shared(self._cov) + self._chol = scipy.linalg.cholesky(self._cov, lower=True) + self._chol_error = None + self._foreground_cov = _WeightedCovariance( + self._n, initial_mean, initial_cov, initial_weight, self.dtype + ) + self._background_cov = _WeightedCovariance(self._n, dtype=self.dtype) + self._n_samples = 0 + + self._doubling = doubling + self._adaptation_window = int(adaptation_window) + self._previous_update = 0 + + def velocity(self, x, out=None): + return np.dot(self._cov, x, out=out) + + def energy(self, x, velocity=None): + if velocity is None: + velocity = self.velocity(x) + return 0.5 * np.dot(x, velocity) + + def velocity_energy(self, x, v_out): + self.velocity(x, out=v_out) + return self.energy(x, v_out) + + def random(self): + vals = np.random.normal(size=self._n).astype(self.dtype) + return scipy.linalg.solve_triangular(self._chol.T, vals, + overwrite_b=True) + + def _update_from_weightvar(self, weightvar): + weightvar.current_covariance(out=self._cov) + try: + self._chol = scipy.linalg.cholesky(self._cov, lower=True) + except scipy.linalg.LinAlgError as error: + self._chol_error = error + self._cov_theano.set_value(self._cov) + + def update(self, sample, grad, tune): + if not tune: + return + + self._foreground_cov.add_sample(sample, weight=1) + self._background_cov.add_sample(sample, weight=1) + self._update_from_weightvar(self._foreground_cov) + + # Steps since previous update + delta = self._n_samples - self._previous_update + if delta >= self._adaptation_window: + self._foreground_cov = self._background_cov + self._background_cov = _WeightedCovariance( + self._n, dtype=self.dtype + ) + + self._previous_update = self._n_samples + if self._doubling: + self._adaptation_window *= 2 + + self._n_samples += 1 + + def raise_ok(self, vmap): + if self._chol_error is not None: + raise ValueError("{0}".format(self._chol_error)) + + +class _WeightedCovariance: + """Online algorithm for computing mean and covariance.""" + + def __init__( + self, + nelem, + initial_mean=None, + initial_covariance=None, + initial_weight=0, + dtype="d", + ): + self._dtype = dtype + self.n_samples = float(initial_weight) + if initial_mean is None: + self.mean = np.zeros(nelem, dtype="d") + else: + self.mean = np.array(initial_mean, dtype="d", copy=True) + if initial_covariance is None: + self.raw_cov = np.eye(nelem, dtype="d") + else: + self.raw_cov = np.array(initial_covariance, dtype="d", copy=True) + + self.raw_cov[:] *= self.n_samples + + if self.raw_cov.shape != (nelem, nelem): + raise ValueError("Invalid shape for initial covariance.") + if self.mean.shape != (nelem,): + raise ValueError("Invalid shape for initial mean.") + + def add_sample(self, x, weight): + x = np.asarray(x) + self.n_samples += 1 + old_diff = x - self.mean + self.mean[:] += old_diff / self.n_samples + new_diff = x - self.mean + self.raw_cov[:] += weight * new_diff[:, None] * old_diff[None, :] + + def current_covariance(self, out=None): + if self.n_samples == 0: + raise ValueError("Can not compute covariance without samples.") + if out is not None: + return np.divide(self.raw_cov, self.n_samples - 1, out=out) + else: + return (self.raw_cov / (self.n_samples - 1)).astype(self._dtype) + + def current_mean(self): + return np.array(self.mean, dtype=self._dtype) + + try: import sksparse.cholmod as cholmod chol_available = True diff --git a/pymc3/tests/test_quadpotential.py b/pymc3/tests/test_quadpotential.py index 0b41c7547f..92912d76d8 100644 --- a/pymc3/tests/test_quadpotential.py +++ b/pymc3/tests/test_quadpotential.py @@ -137,3 +137,64 @@ def energy(self, x, velocity=None): step = pymc3.NUTS(potential=pot) pymc3.sample(10, init=None, step=step, chains=1) assert called + + +def test_weighted_covariance(ndim=10, seed=5432): + np.random.seed(seed) + + L = np.random.randn(ndim, ndim) + L[np.triu_indices_from(L, 1)] = 0.0 + L[np.diag_indices_from(L)] = np.exp(L[np.diag_indices_from(L)]) + cov = np.dot(L, L.T) + mean = np.random.randn(ndim) + + samples = np.random.multivariate_normal(mean, cov, size=100) + mu_est0 = np.mean(samples, axis=0) + cov_est0 = np.cov(samples, rowvar=0) + + est = quadpotential._WeightedCovariance(ndim) + for sample in samples: + est.add_sample(sample, 1) + mu_est = est.current_mean() + cov_est = est.current_covariance() + + assert np.allclose(mu_est, mu_est0) + assert np.allclose(cov_est, cov_est0) + + # Make sure that the weighted estimate also works + est2 = quadpotential._WeightedCovariance( + ndim, + np.mean(samples[:10], axis=0), + np.cov(samples[:10], rowvar=0, bias=True), + 10, + ) + for sample in samples[10:]: + est2.add_sample(sample, 1) + mu_est2 = est2.current_mean() + cov_est2 = est2.current_covariance() + + assert np.allclose(mu_est2, mu_est0) + assert np.allclose(cov_est2, cov_est0) + + +def test_full_adapt_sample_p(seed=4566): + # ref: https://github.com/stan-dev/stan/pull/2672 + np.random.seed(seed) + m = np.array([[3.0, -2.0], [-2.0, 4.0]]) + m_inv = np.linalg.inv(m) + + var = np.array( + [ + [2 * m[0, 0], m[1, 0] * m[1, 0] + m[1, 1] * m[0, 0]], + [m[0, 1] * m[0, 1] + m[1, 1] * m[0, 0], 2 * m[1, 1]], + ] + ) + + n_samples = 1000 + pot = quadpotential.QuadPotentialFullAdapt(2, np.zeros(2), m_inv, 1) + samples = [pot.random() for n in range(n_samples)] + sample_cov = np.cov(samples, rowvar=0) + + # Covariance matrix within 5 sigma of expected value + # (comes from a Wishart distribution) + assert np.all(np.abs(m - sample_cov) < 5 * np.sqrt(var / n_samples)) From 2af982060ed673cee967a60120d0d7c82a57bd1a Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Mon, 19 Aug 2019 16:33:40 -0400 Subject: [PATCH 3/7] Moving shared code to QuadPotentialFull base --- pymc3/step_methods/hmc/quadpotential.py | 37 ++++++++----------------- 1 file changed, 11 insertions(+), 26 deletions(-) diff --git a/pymc3/step_methods/hmc/quadpotential.py b/pymc3/step_methods/hmc/quadpotential.py index 1bb97b330e..191a84cbd3 100644 --- a/pymc3/step_methods/hmc/quadpotential.py +++ b/pymc3/step_methods/hmc/quadpotential.py @@ -417,7 +417,7 @@ def velocity_energy(self, x, v_out): class QuadPotentialFull(QuadPotential): """Basic QuadPotential object for Hamiltonian calculations.""" - def __init__(self, A, dtype=None): + def __init__(self, cov, dtype=None): """Compute the lower cholesky decomposition of the potential. Parameters @@ -428,33 +428,35 @@ def __init__(self, A, dtype=None): if dtype is None: dtype = theano.config.floatX self.dtype = dtype - self.A = A.astype(self.dtype) - self.L = scipy.linalg.cholesky(A, lower=True) + self._cov = np.array(cov, dtype=self.dtype, copy=True) + self._chol = scipy.linalg.cholesky(self._cov, lower=True) + self._n = len(self._cov) def velocity(self, x, out=None): """Compute the current velocity at a position in parameter space.""" - return np.dot(self.A, x, out=out) + return np.dot(self._cov, x, out=out) def random(self): """Draw random value from QuadPotential.""" - n = floatX(normal(size=self.L.shape[0])) - return scipy.linalg.solve_triangular(self.L.T, n) + vals = np.random.normal(size=self._n).astype(self.dtype) + return scipy.linalg.solve_triangular(self._chol.T, vals, + overwrite_b=True) def energy(self, x, velocity=None): """Compute kinetic energy at a position in parameter space.""" if velocity is None: velocity = self.velocity(x) - return .5 * x.dot(velocity) + return 0.5 * np.dot(x, velocity) def velocity_energy(self, x, v_out): """Compute velocity and return kinetic energy at a position in parameter space.""" self.velocity(x, out=v_out) - return 0.5 * np.dot(x, v_out) + return self.energy(x, v_out) __call__ = random -class QuadPotentialFullAdapt(QuadPotential): +class QuadPotentialFullAdapt(QuadPotentialFull): """Adapt a dense mass matrix using the sample covariances If the parameter ``doubling`` is true, the adaptation window is doubled @@ -510,23 +512,6 @@ def __init__( self._adaptation_window = int(adaptation_window) self._previous_update = 0 - def velocity(self, x, out=None): - return np.dot(self._cov, x, out=out) - - def energy(self, x, velocity=None): - if velocity is None: - velocity = self.velocity(x) - return 0.5 * np.dot(x, velocity) - - def velocity_energy(self, x, v_out): - self.velocity(x, out=v_out) - return self.energy(x, v_out) - - def random(self): - vals = np.random.normal(size=self._n).astype(self.dtype) - return scipy.linalg.solve_triangular(self._chol.T, vals, - overwrite_b=True) - def _update_from_weightvar(self, weightvar): weightvar.current_covariance(out=self._cov) try: From ad20b57007a64eaf127fac9e2465131d56cf0368 Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Tue, 20 Aug 2019 14:02:43 -0400 Subject: [PATCH 4/7] adding references to Welford's algorithm --- pymc3/step_methods/hmc/quadpotential.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pymc3/step_methods/hmc/quadpotential.py b/pymc3/step_methods/hmc/quadpotential.py index 191a84cbd3..1c7a098ae6 100644 --- a/pymc3/step_methods/hmc/quadpotential.py +++ b/pymc3/step_methods/hmc/quadpotential.py @@ -548,7 +548,14 @@ def raise_ok(self, vmap): class _WeightedCovariance: - """Online algorithm for computing mean and covariance.""" + """Online algorithm for computing mean and covariance + + This implements the `Welford's algorithm + `_ based + on the implementation in `the Stan math library + `_. + + """ def __init__( self, From db39838ba0a80fbe3c628d9f393ef9c023138fff Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Tue, 3 Dec 2019 10:33:57 -0800 Subject: [PATCH 5/7] adding a few more tests for full_adapt --- pymc3/step_methods/hmc/quadpotential.py | 21 +++++--- pymc3/tests/test_quadpotential.py | 70 +++++++++++++++++++++---- 2 files changed, 76 insertions(+), 15 deletions(-) diff --git a/pymc3/step_methods/hmc/quadpotential.py b/pymc3/step_methods/hmc/quadpotential.py index 1c7a098ae6..876d7a17c9 100644 --- a/pymc3/step_methods/hmc/quadpotential.py +++ b/pymc3/step_methods/hmc/quadpotential.py @@ -1,3 +1,4 @@ +import warnings import numpy as np from numpy.random import normal import scipy.linalg @@ -471,9 +472,12 @@ def __init__( initial_cov=None, initial_weight=0, adaptation_window=101, + update_window=1, doubling=True, dtype=None, ): + warnings.warn("QuadPotentialFullAdapt is an experimental feature") + if initial_cov is not None and initial_cov.ndim != 2: raise ValueError("Initial covariance must be two-dimensional.") if initial_mean.ndim != 1: @@ -499,7 +503,6 @@ def __init__( self.dtype = dtype self._n = n self._cov = np.array(initial_cov, dtype=self.dtype, copy=True) - self._cov_theano = theano.shared(self._cov) self._chol = scipy.linalg.cholesky(self._cov, lower=True) self._chol_error = None self._foreground_cov = _WeightedCovariance( @@ -510,26 +513,32 @@ def __init__( self._doubling = doubling self._adaptation_window = int(adaptation_window) + self._update_window = int(update_window) self._previous_update = 0 def _update_from_weightvar(self, weightvar): weightvar.current_covariance(out=self._cov) try: self._chol = scipy.linalg.cholesky(self._cov, lower=True) - except scipy.linalg.LinAlgError as error: + except (scipy.linalg.LinAlgError, ValueError) as error: self._chol_error = error - self._cov_theano.set_value(self._cov) def update(self, sample, grad, tune): if not tune: return + # Steps since previous update + delta = self._n_samples - self._previous_update + self._foreground_cov.add_sample(sample, weight=1) self._background_cov.add_sample(sample, weight=1) - self._update_from_weightvar(self._foreground_cov) - # Steps since previous update - delta = self._n_samples - self._previous_update + # Update the covariance matrix and recompute the Cholesky factorization + # every "update_window" steps + if (delta + 1) % self._update_window == 0: + self._update_from_weightvar(self._foreground_cov) + + # Reset the background model if the if delta >= self._adaptation_window: self._foreground_cov = self._background_cov self._background_cov = _WeightedCovariance( diff --git a/pymc3/tests/test_quadpotential.py b/pymc3/tests/test_quadpotential.py index 92912d76d8..1021151dd2 100644 --- a/pymc3/tests/test_quadpotential.py +++ b/pymc3/tests/test_quadpotential.py @@ -38,16 +38,16 @@ def test_equal_diag(): x = floatX(np.random.randn(5)) pots = [ quadpotential.quad_potential(diag, False), - quadpotential.quad_potential(1. / diag, True), + quadpotential.quad_potential(1.0 / diag, True), quadpotential.quad_potential(np.diag(diag), False), - quadpotential.quad_potential(np.diag(1. / diag), True), + quadpotential.quad_potential(np.diag(1.0 / diag), True), ] if quadpotential.chol_available: - diag_ = scipy.sparse.csc_matrix(np.diag(1. / diag)) + diag_ = scipy.sparse.csc_matrix(np.diag(1.0 / diag)) pots.append(quadpotential.quad_potential(diag_, True)) - v = np.diag(1. / diag).dot(x) - e = x.dot(np.diag(1. / diag).dot(x)) / 2 + v = np.diag(1.0 / diag).dot(x) + e = x.dot(np.diag(1.0 / diag).dot(x)) / 2 for pot in pots: v_ = pot.velocity(x) e_ = pot.energy(x) @@ -85,9 +85,9 @@ def test_random_diag(): np.random.seed(42) pots = [ quadpotential.quad_potential(d, True), - quadpotential.quad_potential(1./d, False), + quadpotential.quad_potential(1.0 / d, False), quadpotential.quad_potential(np.diag(d), True), - quadpotential.quad_potential(np.diag(1./d), False), + quadpotential.quad_potential(np.diag(1.0 / d), False), ] if quadpotential.chol_available: d_ = scipy.sparse.csc_matrix(np.diag(d)) @@ -95,7 +95,7 @@ def test_random_diag(): pots.append(pot) for pot in pots: vals = np.array([pot.random() for _ in range(1000)]) - npt.assert_allclose(vals.std(0), np.sqrt(1./d), atol=0.1) + npt.assert_allclose(vals.std(0), np.sqrt(1.0 / d), atol=0.1) def test_random_dense(): @@ -112,7 +112,9 @@ def test_random_dense(): quadpotential.QuadPotentialFullInv(inv), ] if quadpotential.chol_available: - pot = quadpotential.QuadPotential_Sparse(scipy.sparse.csc_matrix(cov)) + pot = quadpotential.QuadPotential_Sparse( + scipy.sparse.csc_matrix(cov) + ) pots.append(pot) for pot in pots: cov_ = np.cov(np.array([pot.random() for _ in range(1000)]).T) @@ -198,3 +200,53 @@ def test_full_adapt_sample_p(seed=4566): # Covariance matrix within 5 sigma of expected value # (comes from a Wishart distribution) assert np.all(np.abs(m - sample_cov) < 5 * np.sqrt(var / n_samples)) + + +def test_full_adapt_update_window(seed=1123): + np.random.seed(seed) + init_cov = np.array([[1.0, 0.02], [0.02, 0.8]]) + pot = quadpotential.QuadPotentialFullAdapt( + 2, np.zeros(2), init_cov, 1, update_window=50 + ) + assert np.allclose(pot._cov, init_cov) + for i in range(49): + pot.update(np.random.randn(2), None, True) + assert np.allclose(pot._cov, init_cov) + pot.update(np.random.randn(2), None, True) + assert not np.allclose(pot._cov, init_cov) + + +def test_full_adapt_adaptation_window(seed=8978): + np.random.seed(seed) + window = 10 + pot = quadpotential.QuadPotentialFullAdapt( + 2, np.zeros(2), np.eye(2), 1, adaptation_window=window + ) + for i in range(window + 1): + pot.update(np.random.randn(2), None, True) + assert pot._previous_update == window + assert pot._adaptation_window == window * 2 + + pot = quadpotential.QuadPotentialFullAdapt( + 2, np.zeros(2), np.eye(2), 1, adaptation_window=window, doubling=False + ) + for i in range(window + 1): + pot.update(np.random.randn(2), None, True) + assert pot._previous_update == window + assert pot._adaptation_window == window + + +def test_full_adapt_not_invertible(): + window = 10 + pot = quadpotential.QuadPotentialFullAdapt( + 2, np.zeros(2), np.eye(2), 0, adaptation_window=window + ) + for i in range(window + 1): + pot.update(np.ones(2), None, True) + with pytest.raises(ValueError): + pot.raise_ok(None) + + +def test_full_adapt_warn(): + with pytest.warns(UserWarning): + quadpotential.QuadPotentialFullAdapt(2, np.zeros(2), np.eye(2), 0) From 225a3944aed504644945e4e03b4edb91247c954c Mon Sep 17 00:00:00 2001 From: Dan F-M Date: Tue, 3 Dec 2019 10:41:44 -0800 Subject: [PATCH 6/7] add test for sampling using QuadPotentialFullAdapt --- pymc3/tests/test_quadpotential.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pymc3/tests/test_quadpotential.py b/pymc3/tests/test_quadpotential.py index 1021151dd2..a22ece3f09 100644 --- a/pymc3/tests/test_quadpotential.py +++ b/pymc3/tests/test_quadpotential.py @@ -250,3 +250,22 @@ def test_full_adapt_not_invertible(): def test_full_adapt_warn(): with pytest.warns(UserWarning): quadpotential.QuadPotentialFullAdapt(2, np.zeros(2), np.eye(2), 0) + + +def test_full_adapt_sampling(seed=289586): + np.random.seed(seed) + + L = np.random.randn(5, 5) + L[np.diag_indices_from(L)] = np.exp(L[np.diag_indices_from(L)]) + L[np.triu_indices_from(L, 1)] = 0.0 + + with pymc3.Model() as model: + pymc3.MvNormal("a", mu=np.zeros(len(L)), chol=L, shape=len(L)) + + pot = quadpotential.QuadPotentialFullAdapt( + model.ndim, np.zeros(model.ndim) + ) + step = pymc3.NUTS(model=model, potential=pot) + pymc3.sample( + draws=10, tune=1000, random_seed=seed, step=step, cores=1, chains=1 + ) From d97c3304447fc60a0609914340830c84e925f863 Mon Sep 17 00:00:00 2001 From: Dan Foreman-Mackey Date: Tue, 3 Dec 2019 17:03:40 -0800 Subject: [PATCH 7/7] Update pymc3/step_methods/hmc/quadpotential.py Co-Authored-By: George Ho <19851673+eigenfoo@users.noreply.github.com> --- pymc3/step_methods/hmc/quadpotential.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/step_methods/hmc/quadpotential.py b/pymc3/step_methods/hmc/quadpotential.py index 876d7a17c9..65ae08d969 100644 --- a/pymc3/step_methods/hmc/quadpotential.py +++ b/pymc3/step_methods/hmc/quadpotential.py @@ -538,7 +538,7 @@ def update(self, sample, grad, tune): if (delta + 1) % self._update_window == 0: self._update_from_weightvar(self._foreground_cov) - # Reset the background model if the + # Reset the background covariance if we are at the end of the adaptation window. if delta >= self._adaptation_window: self._foreground_cov = self._background_cov self._background_cov = _WeightedCovariance(