Add DEMetropolisZ stepper (#3784)

michaelosthege · twiecki · web-flow · commit bb574a7cc118 · 2020-02-05T08:34:43.000-03:00
* implement DEMetropolisZ * use len instead of attribute * introduce tune_drop_fraction + improves efficiency in situations with slow initial tuning convergence (see PR #3784) * reset tuned settings in sequential singleprocess sampling * tune DEMetropolisZ lambda by default * use new empty list to reset history * fix dual-use conflict of self.tune attribute * add warning and initialize self.tune attribute * add tests for DEMetropolisZ * add tests to hit more of the diff * add and move notebooks, update release notes * address review feedback * apply review suggestions * more accurate wording Co-Authored-By: Thomas Wiecki <thomas.wiecki@gmail.com> Co-authored-by: Thomas Wiecki <thomas.wiecki@gmail.com>
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
@@ -5,6 +5,8 @@
 ### New features
 - use [fastprogress](https://github.com/fastai/fastprogress) instead of tqdm [#3693](https://github.com/pymc-devs/pymc3/pull/3693)
 - `DEMetropolis` can now tune both `lambda` and `scaling` parameters, but by default neither of them are tuned. See [#3743](https://github.com/pymc-devs/pymc3/pull/3743) for more info.
+- `DEMetropolisZ`, an improved variant of `DEMetropolis` brings better parallelization and higher efficiency with fewer chains with a slower initial convergence. This implementation is experimental. See [#3784](https://github.com/pymc-devs/pymc3/pull/3784) for more info.
+- Notebooks that give insight into `DEMetropolis`, `DEMetropolisZ` and the `DifferentialEquation` interface are now located in the [Tutorials/Deep Dive](https://docs.pymc.io/nb_tutorials/index.html) section.
 
 ### Maintenance
 - Remove `sample_ppc` and `sample_ppc_w` that were deprecated in 3.6.
diff --git a/docs/source/notebooks/DEMetropolisZ_EfficiencyComparison.ipynb b/docs/source/notebooks/DEMetropolisZ_EfficiencyComparison.ipynb
diff --git a/docs/source/notebooks/DEMetropolisZ_tune_drop_fraction.ipynb b/docs/source/notebooks/DEMetropolisZ_tune_drop_fraction.ipynb
diff --git a/docs/source/notebooks/table_of_contents_examples.js b/docs/source/notebooks/table_of_contents_examples.js
@@ -56,6 +56,5 @@ Gallery.contents = {
     "GLM-hierarchical-advi-minibatch": "Variational Inference",
     "ODE_with_manual_gradients": "Inference in ODE models",
     "ODE_API_introduction": "Inference in ODE models",
-    "ODE_API_shapes_and_benchmarking": "Inference in ODE models",
     "probabilistic_matrix_factorization": "Case Studies"
 }
diff --git a/docs/source/notebooks/table_of_contents_tutorials.js b/docs/source/notebooks/table_of_contents_tutorials.js
@@ -10,6 +10,9 @@ Gallery.contents = {
     "Diagnosing_biased_Inference_with_Divergences": "Deep dives",
     "Advanced_usage_of_Theano_in_PyMC3.rst": "Deep dives",
     "getting_started": "Deep dives",
+    "ODE_API_shapes_and_benchmarking": "Deep dives",
+    "DEMetropolisZ_EfficiencyComparison": "Deep dives",
+    "DEMetropolisZ_tune_drop_fraction": "Deep dives",
     "PyMC3_tips_and_heuristic": "How-To",
     "blackbox_external_likelihood": "How-To",
     "profiling": "How-To",
diff --git a/pymc3/sampling.py b/pymc3/sampling.py
@@ -875,6 +875,8 @@ def _iter_sample(
 
     try:
         step.tune = bool(tune)
+        if hasattr(step, 'reset_tuning'):
+            step.reset_tuning()
         for i in range(draws):
             stats = None
 
diff --git a/pymc3/step_methods/__init__.py b/pymc3/step_methods/__init__.py
@@ -17,7 +17,7 @@
 from .hmc import HamiltonianMC, NUTS
 
 from .metropolis import Metropolis
-from .metropolis import DEMetropolis
+from .metropolis import DEMetropolis, DEMetropolisZ
 from .metropolis import BinaryMetropolis
 from .metropolis import BinaryGibbsMetropolis
 from .metropolis import CategoricalGibbsMetropolis
diff --git a/pymc3/step_methods/compound.py b/pymc3/step_methods/compound.py
@@ -67,6 +67,11 @@ def stop_tuning(self):
         for method in self.methods:
             method.stop_tuning()
 
+    def reset_tuning(self):
+        for method in self.methods:
+            if hasattr(method, 'reset_tuning'):
+                method.reset_tuning()
+
     @property
     def vars_shape_dtype(self):
         dtype_shapes = {}
diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py
@@ -16,13 +16,14 @@
 import numpy.random as nr
 import theano
 import scipy.linalg
+import warnings
 
 from ..distributions import draw_values
 from .arraystep import ArrayStepShared, PopulationArrayStepShared, ArrayStep, metrop_select, Competence
 import pymc3 as pm
 from pymc3.theanof import floatX
 
-__all__ = ['Metropolis', 'DEMetropolis', 'BinaryMetropolis', 'BinaryGibbsMetropolis',
+__all__ = ['Metropolis', 'DEMetropolis', 'DEMetropolisZ', 'BinaryMetropolis', 'BinaryGibbsMetropolis',
            'CategoricalGibbsMetropolis', 'NormalProposal', 'CauchyProposal',
            'LaplaceProposal', 'PoissonProposal', 'MultivariateNormalProposal']
 
@@ -572,9 +573,10 @@ def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.0
 
         self.scaling = np.atleast_1d(scaling).astype('d')
         if lamb is None:
+            # default to the optimal lambda for normally distributed targets
             lamb = 2.38 / np.sqrt(2 * model.ndim)
         self.lamb = float(lamb)
-        if not tune in {None, 'scaling', 'lambda'}:
+        if tune not in {None, 'scaling', 'lambda'}:
             raise ValueError('The parameter "tune" must be one of {None, scaling, lambda}')
         self.tune = tune
         self.tune_interval = tune_interval
@@ -630,6 +632,179 @@ def competence(var, has_grad):
         return Competence.COMPATIBLE
 
 
+class DEMetropolisZ(ArrayStepShared):
+    """
+    Adaptive Differential Evolution Metropolis sampling step that uses the past to inform jumps.
+
+    Parameters
+    ----------
+    lamb : float
+        Lambda parameter of the DE proposal mechanism. Defaults to 2.38 / sqrt(2 * ndim)
+    vars : list
+        List of variables for sampler
+    S : standard deviation or covariance matrix
+        Some measure of variance to parameterize proposal distribution
+    proposal_dist : function
+        Function that returns zero-mean deviates when parameterized with
+        S (and n). Defaults to Uniform(-S,+S).
+    scaling : scalar or array
+        Initial scale factor for epsilon. Defaults to 0.001
+    tune : str
+        Which hyperparameter to tune. Defaults to 'lambda', but can also be 'scaling' or None.
+    tune_interval : int
+        The frequency of tuning. Defaults to 100 iterations.
+    tune_drop_fraction : float
+        Fraction of tuning steps that will be removed from the samplers history when the tuning ends.
+        Defaults to 0.9 - keeping the last 10% of tuning steps for good mixing while removing 90% of
+        potentially unconverged tuning positions.
+    model : PyMC Model
+        Optional model for sampling step. Defaults to None (taken from context).
+    mode :  string or `Mode` instance.
+        compilation mode passed to Theano functions
+
+    References
+    ----------
+    .. [Braak2006] Cajo C.F. ter Braak (2006).
+        Differential Evolution Markov Chain with snooker updater and fewer chains.
+        Statistics and Computing
+        `link <https://doi.org/10.1007/s11222-008-9104-9>`__
+    """
+    name = 'DEMetropolisZ'
+
+    default_blocked = True
+    generates_stats = True
+    stats_dtypes = [{
+        'accept': np.float64,
+        'accepted': np.bool,
+        'tune': np.bool,
+        'scaling': np.float64,
+        'lambda': np.float64,
+    }]
+
+    def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.001,
+                 tune='lambda', tune_interval=100, tune_drop_fraction:float=0.9, model=None, mode=None, **kwargs):
+
+        warnings.warn(
+            'The DEMetropolisZ implementation in PyMC3 is very young. You should be extra critical about its results.'
+            ' See Pull Request #3784 for more information.'
+        )
+
+        model = pm.modelcontext(model)
+
+        if vars is None:
+            vars = model.cont_vars
+        vars = pm.inputvars(vars)
+
+        if S is None:
+            S = np.ones(model.ndim)
+        
+        if proposal_dist is not None:
+            self.proposal_dist = proposal_dist(S)
+        else:
+            self.proposal_dist = UniformProposal(S)
+
+        self.scaling = np.atleast_1d(scaling).astype('d')
+        if lamb is None:
+            # default to the optimal lambda for normally distributed targets
+            lamb = 2.38 / np.sqrt(2 * model.ndim)
+        self.lamb = float(lamb)
+        if tune not in {None, 'scaling', 'lambda'}:
+            raise ValueError('The parameter "tune" must be one of {None, scaling, lambda}')
+        self.tune = True
+        self.tune_target = tune
+        self.tune_interval = tune_interval
+        self.tune_drop_fraction = tune_drop_fraction
+        self.steps_until_tune = tune_interval
+        self.accepted = 0
+
+        # cache local history for the Z-proposals
+        self._history = []
+        # remember initial settings before tuning so they can be reset
+        self._untuned_settings = dict(
+            scaling=self.scaling,
+            lamb=self.lamb,
+            steps_until_tune=tune_interval,
+            accepted=self.accepted
+        )
+
+        self.mode = mode
+
+        shared = pm.make_shared_replacements(vars, model)
+        self.delta_logp = delta_logp(model.logpt, vars, shared)
+        super().__init__(vars, shared)
+
+    def reset_tuning(self):
+        """Resets the tuned sampler parameters and history to their initial values."""
+        # history can't be reset via the _untuned_settings dict because it's a list
+        self._history = []
+        for attr, initial_value in self._untuned_settings.items():
+            setattr(self, attr, initial_value)
+        return
+
+    def astep(self, q0):
+        # same tuning scheme as DEMetropolis
+        if not self.steps_until_tune and self.tune:
+            if self.tune_target == 'scaling':
+                self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval))
+            elif self.tune_target == 'lambda':
+                self.lamb = tune(self.lamb, self.accepted / float(self.tune_interval))
+            # Reset counter
+            self.steps_until_tune = self.tune_interval
+            self.accepted = 0
+
+        epsilon = self.proposal_dist() * self.scaling
+
+        it = len(self._history)
+        # use the DE-MCMC-Z proposal scheme as soon as the history has 2 entries
+        if it > 1:
+            # differential evolution proposal
+            # select two other chains
+            iz1 = np.random.randint(it)
+            iz2 = np.random.randint(it)
+            while iz2 == iz1:
+                iz2 = np.random.randint(it)
+            
+            z1 = self._history[iz1]
+            z2 = self._history[iz2]
+            # propose a jump
+            q = floatX(q0 + self.lamb * (z1 - z2) + epsilon)
+        else:
+            # propose just with noise in the first 2 iterations
+            q = floatX(q0 + epsilon)
+
+        accept = self.delta_logp(q, q0)
+        q_new, accepted = metrop_select(accept, q, q0)
+        self.accepted += accepted
+        self._history.append(q_new)
+
+        self.steps_until_tune -= 1
+
+        stats = {
+            'tune': self.tune,
+            'scaling': self.scaling,
+            'lambda': self.lamb,
+            'accept': np.exp(accept),
+            'accepted': accepted
+        }
+
+        return q_new, [stats]
+
+    def stop_tuning(self):
+        """At the end of the tuning phase, this method removes the first x% of the history
+        so future proposals are not informed by unconverged tuning iterations.
+        """
+        it = len(self._history)
+        n_drop = int(self.tune_drop_fraction * it)
+        self._history = self._history[n_drop:]
+        return super().stop_tuning()
+
+    @staticmethod
+    def competence(var, has_grad):
+        if var.dtype in pm.discrete_types:
+            return Competence.INCOMPATIBLE
+        return Competence.COMPATIBLE
+
+
 def sample_except(limit, excluded):
     candidate = nr.choice(limit - 1)
     if candidate >= excluded:
diff --git a/pymc3/tests/test_step.py b/pymc3/tests/test_step.py

Original file line number	Diff line number	Diff line change
`@@ -56,6 +56,5 @@ Gallery.contents = {`
`56`	`56`	`"GLM-hierarchical-advi-minibatch": "Variational Inference",`
`57`	`57`	`"ODE_with_manual_gradients": "Inference in ODE models",`
`58`	`58`	`"ODE_API_introduction": "Inference in ODE models",`
`59`		`- "ODE_API_shapes_and_benchmarking": "Inference in ODE models",`
`60`	`59`	`"probabilistic_matrix_factorization": "Case Studies"`
`61`	`60`	`}`