Skip to content

Commit bb574a7

Browse files
Add DEMetropolisZ stepper (#3784)
* implement DEMetropolisZ * use len instead of attribute * introduce tune_drop_fraction + improves efficiency in situations with slow initial tuning convergence (see PR #3784) * reset tuned settings in sequential singleprocess sampling * tune DEMetropolisZ lambda by default * use new empty list to reset history * fix dual-use conflict of self.tune attribute * add warning and initialize self.tune attribute * add tests for DEMetropolisZ * add tests to hit more of the diff * add and move notebooks, update release notes * address review feedback * apply review suggestions * more accurate wording Co-Authored-By: Thomas Wiecki <[email protected]> Co-authored-by: Thomas Wiecki <[email protected]>
1 parent dfc9102 commit bb574a7

10 files changed

+5063
-4
lines changed

Diff for: RELEASE-NOTES.md

+2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
### New features
66
- use [fastprogress](https://github.com/fastai/fastprogress) instead of tqdm [#3693](https://github.com/pymc-devs/pymc3/pull/3693)
77
- `DEMetropolis` can now tune both `lambda` and `scaling` parameters, but by default neither of them are tuned. See [#3743](https://github.com/pymc-devs/pymc3/pull/3743) for more info.
8+
- `DEMetropolisZ`, an improved variant of `DEMetropolis` brings better parallelization and higher efficiency with fewer chains with a slower initial convergence. This implementation is experimental. See [#3784](https://github.com/pymc-devs/pymc3/pull/3784) for more info.
9+
- Notebooks that give insight into `DEMetropolis`, `DEMetropolisZ` and the `DifferentialEquation` interface are now located in the [Tutorials/Deep Dive](https://docs.pymc.io/nb_tutorials/index.html) section.
810

911
### Maintenance
1012
- Remove `sample_ppc` and `sample_ppc_w` that were deprecated in 3.6.

Diff for: docs/source/notebooks/DEMetropolisZ_EfficiencyComparison.ipynb

+2,459
Large diffs are not rendered by default.

Diff for: docs/source/notebooks/DEMetropolisZ_tune_drop_fraction.ipynb

+2,284
Large diffs are not rendered by default.

Diff for: docs/source/notebooks/table_of_contents_examples.js

-1
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,5 @@ Gallery.contents = {
5656
"GLM-hierarchical-advi-minibatch": "Variational Inference",
5757
"ODE_with_manual_gradients": "Inference in ODE models",
5858
"ODE_API_introduction": "Inference in ODE models",
59-
"ODE_API_shapes_and_benchmarking": "Inference in ODE models",
6059
"probabilistic_matrix_factorization": "Case Studies"
6160
}

Diff for: docs/source/notebooks/table_of_contents_tutorials.js

+3
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ Gallery.contents = {
1010
"Diagnosing_biased_Inference_with_Divergences": "Deep dives",
1111
"Advanced_usage_of_Theano_in_PyMC3.rst": "Deep dives",
1212
"getting_started": "Deep dives",
13+
"ODE_API_shapes_and_benchmarking": "Deep dives",
14+
"DEMetropolisZ_EfficiencyComparison": "Deep dives",
15+
"DEMetropolisZ_tune_drop_fraction": "Deep dives",
1316
"PyMC3_tips_and_heuristic": "How-To",
1417
"blackbox_external_likelihood": "How-To",
1518
"profiling": "How-To",

Diff for: pymc3/sampling.py

+2
Original file line numberDiff line numberDiff line change
@@ -875,6 +875,8 @@ def _iter_sample(
875875

876876
try:
877877
step.tune = bool(tune)
878+
if hasattr(step, 'reset_tuning'):
879+
step.reset_tuning()
878880
for i in range(draws):
879881
stats = None
880882

Diff for: pymc3/step_methods/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from .hmc import HamiltonianMC, NUTS
1818

1919
from .metropolis import Metropolis
20-
from .metropolis import DEMetropolis
20+
from .metropolis import DEMetropolis, DEMetropolisZ
2121
from .metropolis import BinaryMetropolis
2222
from .metropolis import BinaryGibbsMetropolis
2323
from .metropolis import CategoricalGibbsMetropolis

Diff for: pymc3/step_methods/compound.py

+5
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,11 @@ def stop_tuning(self):
6767
for method in self.methods:
6868
method.stop_tuning()
6969

70+
def reset_tuning(self):
71+
for method in self.methods:
72+
if hasattr(method, 'reset_tuning'):
73+
method.reset_tuning()
74+
7075
@property
7176
def vars_shape_dtype(self):
7277
dtype_shapes = {}

Diff for: pymc3/step_methods/metropolis.py

+177-2
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,14 @@
1616
import numpy.random as nr
1717
import theano
1818
import scipy.linalg
19+
import warnings
1920

2021
from ..distributions import draw_values
2122
from .arraystep import ArrayStepShared, PopulationArrayStepShared, ArrayStep, metrop_select, Competence
2223
import pymc3 as pm
2324
from pymc3.theanof import floatX
2425

25-
__all__ = ['Metropolis', 'DEMetropolis', 'BinaryMetropolis', 'BinaryGibbsMetropolis',
26+
__all__ = ['Metropolis', 'DEMetropolis', 'DEMetropolisZ', 'BinaryMetropolis', 'BinaryGibbsMetropolis',
2627
'CategoricalGibbsMetropolis', 'NormalProposal', 'CauchyProposal',
2728
'LaplaceProposal', 'PoissonProposal', 'MultivariateNormalProposal']
2829

@@ -572,9 +573,10 @@ def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.0
572573

573574
self.scaling = np.atleast_1d(scaling).astype('d')
574575
if lamb is None:
576+
# default to the optimal lambda for normally distributed targets
575577
lamb = 2.38 / np.sqrt(2 * model.ndim)
576578
self.lamb = float(lamb)
577-
if not tune in {None, 'scaling', 'lambda'}:
579+
if tune not in {None, 'scaling', 'lambda'}:
578580
raise ValueError('The parameter "tune" must be one of {None, scaling, lambda}')
579581
self.tune = tune
580582
self.tune_interval = tune_interval
@@ -630,6 +632,179 @@ def competence(var, has_grad):
630632
return Competence.COMPATIBLE
631633

632634

635+
class DEMetropolisZ(ArrayStepShared):
636+
"""
637+
Adaptive Differential Evolution Metropolis sampling step that uses the past to inform jumps.
638+
639+
Parameters
640+
----------
641+
lamb : float
642+
Lambda parameter of the DE proposal mechanism. Defaults to 2.38 / sqrt(2 * ndim)
643+
vars : list
644+
List of variables for sampler
645+
S : standard deviation or covariance matrix
646+
Some measure of variance to parameterize proposal distribution
647+
proposal_dist : function
648+
Function that returns zero-mean deviates when parameterized with
649+
S (and n). Defaults to Uniform(-S,+S).
650+
scaling : scalar or array
651+
Initial scale factor for epsilon. Defaults to 0.001
652+
tune : str
653+
Which hyperparameter to tune. Defaults to 'lambda', but can also be 'scaling' or None.
654+
tune_interval : int
655+
The frequency of tuning. Defaults to 100 iterations.
656+
tune_drop_fraction : float
657+
Fraction of tuning steps that will be removed from the samplers history when the tuning ends.
658+
Defaults to 0.9 - keeping the last 10% of tuning steps for good mixing while removing 90% of
659+
potentially unconverged tuning positions.
660+
model : PyMC Model
661+
Optional model for sampling step. Defaults to None (taken from context).
662+
mode : string or `Mode` instance.
663+
compilation mode passed to Theano functions
664+
665+
References
666+
----------
667+
.. [Braak2006] Cajo C.F. ter Braak (2006).
668+
Differential Evolution Markov Chain with snooker updater and fewer chains.
669+
Statistics and Computing
670+
`link <https://doi.org/10.1007/s11222-008-9104-9>`__
671+
"""
672+
name = 'DEMetropolisZ'
673+
674+
default_blocked = True
675+
generates_stats = True
676+
stats_dtypes = [{
677+
'accept': np.float64,
678+
'accepted': np.bool,
679+
'tune': np.bool,
680+
'scaling': np.float64,
681+
'lambda': np.float64,
682+
}]
683+
684+
def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.001,
685+
tune='lambda', tune_interval=100, tune_drop_fraction:float=0.9, model=None, mode=None, **kwargs):
686+
687+
warnings.warn(
688+
'The DEMetropolisZ implementation in PyMC3 is very young. You should be extra critical about its results.'
689+
' See Pull Request #3784 for more information.'
690+
)
691+
692+
model = pm.modelcontext(model)
693+
694+
if vars is None:
695+
vars = model.cont_vars
696+
vars = pm.inputvars(vars)
697+
698+
if S is None:
699+
S = np.ones(model.ndim)
700+
701+
if proposal_dist is not None:
702+
self.proposal_dist = proposal_dist(S)
703+
else:
704+
self.proposal_dist = UniformProposal(S)
705+
706+
self.scaling = np.atleast_1d(scaling).astype('d')
707+
if lamb is None:
708+
# default to the optimal lambda for normally distributed targets
709+
lamb = 2.38 / np.sqrt(2 * model.ndim)
710+
self.lamb = float(lamb)
711+
if tune not in {None, 'scaling', 'lambda'}:
712+
raise ValueError('The parameter "tune" must be one of {None, scaling, lambda}')
713+
self.tune = True
714+
self.tune_target = tune
715+
self.tune_interval = tune_interval
716+
self.tune_drop_fraction = tune_drop_fraction
717+
self.steps_until_tune = tune_interval
718+
self.accepted = 0
719+
720+
# cache local history for the Z-proposals
721+
self._history = []
722+
# remember initial settings before tuning so they can be reset
723+
self._untuned_settings = dict(
724+
scaling=self.scaling,
725+
lamb=self.lamb,
726+
steps_until_tune=tune_interval,
727+
accepted=self.accepted
728+
)
729+
730+
self.mode = mode
731+
732+
shared = pm.make_shared_replacements(vars, model)
733+
self.delta_logp = delta_logp(model.logpt, vars, shared)
734+
super().__init__(vars, shared)
735+
736+
def reset_tuning(self):
737+
"""Resets the tuned sampler parameters and history to their initial values."""
738+
# history can't be reset via the _untuned_settings dict because it's a list
739+
self._history = []
740+
for attr, initial_value in self._untuned_settings.items():
741+
setattr(self, attr, initial_value)
742+
return
743+
744+
def astep(self, q0):
745+
# same tuning scheme as DEMetropolis
746+
if not self.steps_until_tune and self.tune:
747+
if self.tune_target == 'scaling':
748+
self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval))
749+
elif self.tune_target == 'lambda':
750+
self.lamb = tune(self.lamb, self.accepted / float(self.tune_interval))
751+
# Reset counter
752+
self.steps_until_tune = self.tune_interval
753+
self.accepted = 0
754+
755+
epsilon = self.proposal_dist() * self.scaling
756+
757+
it = len(self._history)
758+
# use the DE-MCMC-Z proposal scheme as soon as the history has 2 entries
759+
if it > 1:
760+
# differential evolution proposal
761+
# select two other chains
762+
iz1 = np.random.randint(it)
763+
iz2 = np.random.randint(it)
764+
while iz2 == iz1:
765+
iz2 = np.random.randint(it)
766+
767+
z1 = self._history[iz1]
768+
z2 = self._history[iz2]
769+
# propose a jump
770+
q = floatX(q0 + self.lamb * (z1 - z2) + epsilon)
771+
else:
772+
# propose just with noise in the first 2 iterations
773+
q = floatX(q0 + epsilon)
774+
775+
accept = self.delta_logp(q, q0)
776+
q_new, accepted = metrop_select(accept, q, q0)
777+
self.accepted += accepted
778+
self._history.append(q_new)
779+
780+
self.steps_until_tune -= 1
781+
782+
stats = {
783+
'tune': self.tune,
784+
'scaling': self.scaling,
785+
'lambda': self.lamb,
786+
'accept': np.exp(accept),
787+
'accepted': accepted
788+
}
789+
790+
return q_new, [stats]
791+
792+
def stop_tuning(self):
793+
"""At the end of the tuning phase, this method removes the first x% of the history
794+
so future proposals are not informed by unconverged tuning iterations.
795+
"""
796+
it = len(self._history)
797+
n_drop = int(self.tune_drop_fraction * it)
798+
self._history = self._history[n_drop:]
799+
return super().stop_tuning()
800+
801+
@staticmethod
802+
def competence(var, has_grad):
803+
if var.dtype in pm.discrete_types:
804+
return Competence.INCOMPATIBLE
805+
return Competence.COMPATIBLE
806+
807+
633808
def sample_except(limit, excluded):
634809
candidate = nr.choice(limit - 1)
635810
if candidate >= excluded:

0 commit comments

Comments
 (0)