Skip to content

add stein methods #2183

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
May 22, 2017
58 changes: 39 additions & 19 deletions pymc3/tests/test_variational_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@
from pymc3 import Model, Normal
from pymc3.variational import (
ADVI, FullRankADVI, SVGD,
Empirical,
fit
Empirical, ASVGD,
MeanField, fit
)
from pymc3.variational.operators import KL
from pymc3.variational.approximations import MeanField

from pymc3.tests import models
from pymc3.tests.helpers import SeededTest
Expand Down Expand Up @@ -70,7 +69,15 @@ class TestApproximates:
class Base(SeededTest):
inference = None
NITER = 12000
optimizer = functools.partial(pm.adam, learning_rate=.01)
optimizer = pm.adagrad_window(learning_rate=0.01)
conv_cb = property(lambda self: [
pm.callbacks.CheckParametersConvergence(
every=500,
diff='relative', tolerance=0.001),
pm.callbacks.CheckParametersConvergence(
every=500,
diff='absolute', tolerance=0.0001)
])

def test_vars_view(self):
_, model, _ = models.multidimensional_model()
Expand Down Expand Up @@ -147,11 +154,10 @@ def test_optimizer_with_full_data(self):
inf.fit(10)
approx = inf.fit(self.NITER,
obj_optimizer=self.optimizer,
callbacks=
[pm.callbacks.CheckParametersConvergence()],)
callbacks=self.conv_cb,)
trace = approx.sample(10000)
np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.1)
np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4)
np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.05)
np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.1)

def test_optimizer_minibatch_with_generator(self):
n = 1000
Expand All @@ -176,11 +182,10 @@ def create_minibatch(data):
Normal('x', mu=mu_, sd=sd, observed=minibatches, total_size=n)
inf = self.inference()
approx = inf.fit(self.NITER * 3, obj_optimizer=self.optimizer,
callbacks=
[pm.callbacks.CheckParametersConvergence()])
callbacks=self.conv_cb)
trace = approx.sample(10000)
np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.1)
np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4)
np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.05)
np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.1)

def test_optimizer_minibatch_with_callback(self):
n = 1000
Expand Down Expand Up @@ -208,12 +213,21 @@ def cb(*_):
mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0)
Normal('x', mu=mu_, sd=sd, observed=data_t, total_size=n)
inf = self.inference(scale_cost_to_minibatch=True)
approx = inf.fit(self.NITER * 3, callbacks=
[cb, pm.callbacks.CheckParametersConvergence()],
obj_n_mc=10, obj_optimizer=self.optimizer)
approx = inf.fit(
self.NITER * 3, callbacks=[cb] + self.conv_cb, obj_optimizer=self.optimizer)
trace = approx.sample(10000)
np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.4)
np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4)
np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.05)
np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.1)

def test_n_obj_mc(self):
n_samples = 100
xs = np.random.binomial(n=1, p=0.2, size=n_samples)
with pm.Model():
p = pm.Beta('p', alpha=1, beta=1)
pm.Binomial('xs', n=1, p=p, observed=xs)
inf = self.inference(scale_cost_to_minibatch=True)
# should just work
inf.fit(10, obj_n_mc=10, obj_optimizer=self.optimizer)

def test_pickling(self):
with models.multidimensional_model()[1]:
Expand Down Expand Up @@ -277,8 +291,14 @@ def test_from_advi(self):

class TestSVGD(TestApproximates.Base):
inference = functools.partial(SVGD, n_particles=100)
NITER = 2500
optimizer = functools.partial(pm.adam, learning_rate=.1)


class TestASVGD(TestApproximates.Base):
NITER = 15000
inference = ASVGD
test_aevb = _test_aevb
optimizer = pm.adagrad_window(learning_rate=0.002)
conv_cb = []


class TestEmpirical(SeededTest):
Expand Down
22 changes: 14 additions & 8 deletions pymc3/variational/__init__.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,45 @@
from .advi import advi, sample_vp
from .advi_minibatch import advi_minibatch

# commonly used
from . import updates
from .updates import (
sgd,
apply_momentum,
momentum,
apply_nesterov_momentum,
adagrad_window,
nesterov_momentum,
adagrad,
adagrad_window,
rmsprop,
adadelta,
adam,
adamax,
norm_constraint,
total_norm_constraint,
total_norm_constraint
)

from . import inference
from .inference import (
ADVI,
FullRankADVI,
SVGD,
fit,
ASVGD,
Inference,
fit
)

from . import approximations
from .approximations import (
Empirical,
FullRank,
MeanField,
FullRank,
Empirical,
sample_approx
)

from . import approximations
# special
from .stein import Stein
from . import operators
from . import test_functions
from . import opvi
from . import updates
from . import inference
from . import callbacks
56 changes: 31 additions & 25 deletions pymc3/variational/approximations.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class MeanField(Approximation):
mapping {model_variable -> local_variable (:math:`\\mu`, :math:`\\rho`)}
Local Vars are used for Autoencoding Variational Bayes
See (AEVB; Kingma and Welling, 2014) for details
model : :class:`pymc3.Model`
model : :class:`pymc3.Model`
PyMC3 model for inference
start : `Point`
initial mean
Expand All @@ -38,14 +38,14 @@ class MeanField(Approximation):
1 at the start and 0 in the end. So slow decay will be ok.
See (Sticking the Landing; Geoffrey Roeder,
Yuhuai Wu, David Duvenaud, 2016) for details
scale_cost_to_minibatch : `bool`
scale_cost_to_minibatch : `bool`
Scale cost to minibatch instead of full dataset, default False
random_seed : None or int
leave None to use package global RandomStream or other
valid value to create instance specific one

References
----------
----------
- Geoffrey Roeder, Yuhuai Wu, David Duvenaud, 2016
Sticking the Landing: A Simple Reduced-Variance Gradient for ADVI
approximateinference.org/accepted/RoederEtAl2016.pdf
Expand Down Expand Up @@ -76,12 +76,9 @@ def create_shared_params(self, **kwargs):
start = start_
start = self.gbij.map(start)
return {'mu': theano.shared(
pm.floatX(start),
'mu'),
pm.floatX(start), 'mu'),
'rho': theano.shared(
np.zeros((self.global_size,), dtype=theano.config.floatX),
'rho')
}
pm.floatX(np.zeros((self.global_size,))), 'rho')}

def log_q_W_global(self, z):
"""
Expand Down Expand Up @@ -120,13 +117,13 @@ class FullRank(Approximation):
archiving better convergence properties. Common schedule is
1 at the start and 0 in the end. So slow decay will be ok.
See (Sticking the Landing; Geoffrey Roeder,
Yuhuai Wu, David Duvenaud, 2016) for details
Yuhuai Wu, David Duvenaud, 2016) for details
scale_cost_to_minibatch : bool, default False
Scale cost to minibatch instead of full dataset
random_seed : None or int
leave None to use package global RandomStream or other
valid value to create instance specific one

Other Parameters
----------------
gpu_compat : bool
Expand All @@ -138,6 +135,7 @@ class FullRank(Approximation):
Sticking the Landing: A Simple Reduced-Variance Gradient for ADVI
approximateinference.org/accepted/RoederEtAl2016.pdf
"""

def __init__(self, local_rv=None, model=None, cost_part_grad_scale=1,
scale_cost_to_minibatch=False,
gpu_compat=False, random_seed=None, **kwargs):
Expand Down Expand Up @@ -173,7 +171,9 @@ def tril_index_matrix(self):
num_tril_entries = self.num_tril_entries
tril_index_matrix = np.zeros([n, n], dtype=int)
tril_index_matrix[np.tril_indices(n)] = np.arange(num_tril_entries)
tril_index_matrix[np.tril_indices(n)[::-1]] = np.arange(num_tril_entries)
tril_index_matrix[
np.tril_indices(n)[::-1]
] = np.arange(num_tril_entries)
return tril_index_matrix

def create_shared_params(self, **kwargs):
Expand All @@ -184,14 +184,14 @@ def create_shared_params(self, **kwargs):
start_ = self.model.test_point.copy()
pm.sampling._update_start_vals(start_, start, self.model)
start = start_
start = self.gbij.map(start)
start = pm.floatX(self.gbij.map(start))
n = self.global_size
L_tril = (
np.eye(n)
[np.tril_indices(n)]
.astype(theano.config.floatX)
)
return {'mu': theano.shared(pm.floatX(start), 'mu'),
return {'mu': theano.shared(start, 'mu'),
'L_tril': theano.shared(L_tril, 'L_tril')
}

Expand Down Expand Up @@ -220,7 +220,7 @@ def from_mean_field(cls, mean_field, gpu_compat=False):
"""Construct FullRank from MeanField approximation

Parameters
----------
----------
mean_field : :class:`MeanField`
approximation to start with

Expand Down Expand Up @@ -264,9 +264,9 @@ class Empirical(Approximation):
mapping {model_variable -> local_variable (:math:`\\mu`, :math:`\\rho`)}
Local Vars are used for Autoencoding Variational Bayes
See (AEVB; Kingma and Welling, 2014) for details
scale_cost_to_minibatch : `bool`
scale_cost_to_minibatch : `bool`
Scale cost to minibatch instead of full dataset, default False
model : :class:`pymc3.Model`
model : :class:`pymc3.Model`
PyMC3 model for inference
random_seed : None or int
leave None to use package global RandomStream or other
Expand All @@ -279,6 +279,7 @@ class Empirical(Approximation):
... trace = sample(1000, step=step)
... histogram = Empirical(trace[100:])
"""

def __init__(self, trace, local_rv=None,
scale_cost_to_minibatch=False,
model=None, random_seed=None, **kwargs):
Expand Down Expand Up @@ -368,15 +369,15 @@ def from_noise(cls, size, jitter=.01, local_rv=None,

Parameters
----------
size : `int`
size : `int`
number of initial particles
jitter : `float`
jitter : `float`
initial sd
local_rv : `dict`
mapping {model_variable -> local_variable}
Local Vars are used for Autoencoding Variational Bayes
See (AEVB; Kingma and Welling, 2014) for details
start : `Point`
start : `Point`
initial point
model : :class:`pymc3.Model`
PyMC3 model for inference
Expand All @@ -386,20 +387,25 @@ def from_noise(cls, size, jitter=.01, local_rv=None,
kwargs : other kwargs passed to init

Returns
-------
-------
:class:`Empirical`
"""
hist = cls(None, local_rv=local_rv, model=model, random_seed=random_seed, **kwargs)
hist = cls(
None,
local_rv=local_rv,
model=model,
random_seed=random_seed,
**kwargs)
if start is None:
start = hist.model.test_point
else:
start_ = hist.model.test_point.copy()
pm.sampling._update_start_vals(start_, start, hist.model)
start = start_
start = hist.gbij.map(start)
start = pm.floatX(hist.gbij.map(start))
# Initialize particles
x0 = np.tile(start, (size, 1))
x0 += np.random.normal(0, jitter, x0.shape)
x0 += pm.floatX(np.random.normal(0, jitter, x0.shape))
hist.histogram.set_value(x0)
return hist

Expand All @@ -408,7 +414,7 @@ def sample_approx(approx, draws=100, include_transformed=True):
"""Draw samples from variational posterior.

Parameters
----------
----------
approx : :class:`Approximation`
Approximation to sample from
draws : `int`
Expand All @@ -417,7 +423,7 @@ def sample_approx(approx, draws=100, include_transformed=True):
If True, transformed variables are also sampled. Default is True.

Returns
-------
-------
trace : class:`pymc3.backends.base.MultiTrace`
Samples drawn from variational posterior.
"""
Expand Down
8 changes: 5 additions & 3 deletions pymc3/variational/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,13 @@ def __call__(self, approx, loss, i):


def relative(current, prev, eps=1e-6):
return (np.abs(current - prev)+eps)/(np.abs(prev)+eps)
return (np.abs(current - prev) + eps) / (np.abs(prev) + eps)


def absolute(current, prev):
return np.abs(current - prev)


_diff = dict(
relative=relative,
absolute=absolute
Expand All @@ -32,7 +33,7 @@ class CheckParametersConvergence(Callback):
every : int
check frequency
tolerance : float
if diff norm < tolerance : break
if diff norm < tolerance : break
diff : str
difference type one of {'absolute', 'relative'}
ord : {non-zero int, inf, -inf, 'fro', 'nuc'}, optional
Expand All @@ -50,7 +51,8 @@ class CheckParametersConvergence(Callback):
... )
"""

def __init__(self, every=100, tolerance=1e-3, diff='relative', ord=np.inf):
def __init__(self, every=100, tolerance=1e-3,
diff='relative', ord=np.inf):
self._diff = _diff[diff]
self.ord = ord
self.every = every
Expand Down
Loading