|
16 | 16 | import numpy.random as nr
|
17 | 17 | import theano
|
18 | 18 | import scipy.linalg
|
| 19 | +import warnings |
19 | 20 |
|
20 | 21 | from ..distributions import draw_values
|
21 | 22 | from .arraystep import ArrayStepShared, PopulationArrayStepShared, ArrayStep, metrop_select, Competence
|
22 | 23 | import pymc3 as pm
|
23 | 24 | from pymc3.theanof import floatX
|
24 | 25 |
|
25 |
| -__all__ = ['Metropolis', 'DEMetropolis', 'BinaryMetropolis', 'BinaryGibbsMetropolis', |
| 26 | +__all__ = ['Metropolis', 'DEMetropolis', 'DEMetropolisZ', 'BinaryMetropolis', 'BinaryGibbsMetropolis', |
26 | 27 | 'CategoricalGibbsMetropolis', 'NormalProposal', 'CauchyProposal',
|
27 | 28 | 'LaplaceProposal', 'PoissonProposal', 'MultivariateNormalProposal']
|
28 | 29 |
|
@@ -572,9 +573,10 @@ def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.0
|
572 | 573 |
|
573 | 574 | self.scaling = np.atleast_1d(scaling).astype('d')
|
574 | 575 | if lamb is None:
|
| 576 | + # default to the optimal lambda for normally distributed targets |
575 | 577 | lamb = 2.38 / np.sqrt(2 * model.ndim)
|
576 | 578 | self.lamb = float(lamb)
|
577 |
| - if not tune in {None, 'scaling', 'lambda'}: |
| 579 | + if tune not in {None, 'scaling', 'lambda'}: |
578 | 580 | raise ValueError('The parameter "tune" must be one of {None, scaling, lambda}')
|
579 | 581 | self.tune = tune
|
580 | 582 | self.tune_interval = tune_interval
|
@@ -630,6 +632,179 @@ def competence(var, has_grad):
|
630 | 632 | return Competence.COMPATIBLE
|
631 | 633 |
|
632 | 634 |
|
| 635 | +class DEMetropolisZ(ArrayStepShared): |
| 636 | + """ |
| 637 | + Adaptive Differential Evolution Metropolis sampling step that uses the past to inform jumps. |
| 638 | +
|
| 639 | + Parameters |
| 640 | + ---------- |
| 641 | + lamb : float |
| 642 | + Lambda parameter of the DE proposal mechanism. Defaults to 2.38 / sqrt(2 * ndim) |
| 643 | + vars : list |
| 644 | + List of variables for sampler |
| 645 | + S : standard deviation or covariance matrix |
| 646 | + Some measure of variance to parameterize proposal distribution |
| 647 | + proposal_dist : function |
| 648 | + Function that returns zero-mean deviates when parameterized with |
| 649 | + S (and n). Defaults to Uniform(-S,+S). |
| 650 | + scaling : scalar or array |
| 651 | + Initial scale factor for epsilon. Defaults to 0.001 |
| 652 | + tune : str |
| 653 | + Which hyperparameter to tune. Defaults to 'lambda', but can also be 'scaling' or None. |
| 654 | + tune_interval : int |
| 655 | + The frequency of tuning. Defaults to 100 iterations. |
| 656 | + tune_drop_fraction : float |
| 657 | + Fraction of tuning steps that will be removed from the samplers history when the tuning ends. |
| 658 | + Defaults to 0.9 - keeping the last 10% of tuning steps for good mixing while removing 90% of |
| 659 | + potentially unconverged tuning positions. |
| 660 | + model : PyMC Model |
| 661 | + Optional model for sampling step. Defaults to None (taken from context). |
| 662 | + mode : string or `Mode` instance. |
| 663 | + compilation mode passed to Theano functions |
| 664 | +
|
| 665 | + References |
| 666 | + ---------- |
| 667 | + .. [Braak2006] Cajo C.F. ter Braak (2006). |
| 668 | + Differential Evolution Markov Chain with snooker updater and fewer chains. |
| 669 | + Statistics and Computing |
| 670 | + `link <https://doi.org/10.1007/s11222-008-9104-9>`__ |
| 671 | + """ |
| 672 | + name = 'DEMetropolisZ' |
| 673 | + |
| 674 | + default_blocked = True |
| 675 | + generates_stats = True |
| 676 | + stats_dtypes = [{ |
| 677 | + 'accept': np.float64, |
| 678 | + 'accepted': np.bool, |
| 679 | + 'tune': np.bool, |
| 680 | + 'scaling': np.float64, |
| 681 | + 'lambda': np.float64, |
| 682 | + }] |
| 683 | + |
| 684 | + def __init__(self, vars=None, S=None, proposal_dist=None, lamb=None, scaling=0.001, |
| 685 | + tune='lambda', tune_interval=100, tune_drop_fraction:float=0.9, model=None, mode=None, **kwargs): |
| 686 | + |
| 687 | + warnings.warn( |
| 688 | + 'The DEMetropolisZ implementation in PyMC3 is very young. You should be extra critical about its results.' |
| 689 | + ' See Pull Request #3784 for more information.' |
| 690 | + ) |
| 691 | + |
| 692 | + model = pm.modelcontext(model) |
| 693 | + |
| 694 | + if vars is None: |
| 695 | + vars = model.cont_vars |
| 696 | + vars = pm.inputvars(vars) |
| 697 | + |
| 698 | + if S is None: |
| 699 | + S = np.ones(model.ndim) |
| 700 | + |
| 701 | + if proposal_dist is not None: |
| 702 | + self.proposal_dist = proposal_dist(S) |
| 703 | + else: |
| 704 | + self.proposal_dist = UniformProposal(S) |
| 705 | + |
| 706 | + self.scaling = np.atleast_1d(scaling).astype('d') |
| 707 | + if lamb is None: |
| 708 | + # default to the optimal lambda for normally distributed targets |
| 709 | + lamb = 2.38 / np.sqrt(2 * model.ndim) |
| 710 | + self.lamb = float(lamb) |
| 711 | + if tune not in {None, 'scaling', 'lambda'}: |
| 712 | + raise ValueError('The parameter "tune" must be one of {None, scaling, lambda}') |
| 713 | + self.tune = True |
| 714 | + self.tune_target = tune |
| 715 | + self.tune_interval = tune_interval |
| 716 | + self.tune_drop_fraction = tune_drop_fraction |
| 717 | + self.steps_until_tune = tune_interval |
| 718 | + self.accepted = 0 |
| 719 | + |
| 720 | + # cache local history for the Z-proposals |
| 721 | + self._history = [] |
| 722 | + # remember initial settings before tuning so they can be reset |
| 723 | + self._untuned_settings = dict( |
| 724 | + scaling=self.scaling, |
| 725 | + lamb=self.lamb, |
| 726 | + steps_until_tune=tune_interval, |
| 727 | + accepted=self.accepted |
| 728 | + ) |
| 729 | + |
| 730 | + self.mode = mode |
| 731 | + |
| 732 | + shared = pm.make_shared_replacements(vars, model) |
| 733 | + self.delta_logp = delta_logp(model.logpt, vars, shared) |
| 734 | + super().__init__(vars, shared) |
| 735 | + |
| 736 | + def reset_tuning(self): |
| 737 | + """Resets the tuned sampler parameters and history to their initial values.""" |
| 738 | + # history can't be reset via the _untuned_settings dict because it's a list |
| 739 | + self._history = [] |
| 740 | + for attr, initial_value in self._untuned_settings.items(): |
| 741 | + setattr(self, attr, initial_value) |
| 742 | + return |
| 743 | + |
| 744 | + def astep(self, q0): |
| 745 | + # same tuning scheme as DEMetropolis |
| 746 | + if not self.steps_until_tune and self.tune: |
| 747 | + if self.tune_target == 'scaling': |
| 748 | + self.scaling = tune(self.scaling, self.accepted / float(self.tune_interval)) |
| 749 | + elif self.tune_target == 'lambda': |
| 750 | + self.lamb = tune(self.lamb, self.accepted / float(self.tune_interval)) |
| 751 | + # Reset counter |
| 752 | + self.steps_until_tune = self.tune_interval |
| 753 | + self.accepted = 0 |
| 754 | + |
| 755 | + epsilon = self.proposal_dist() * self.scaling |
| 756 | + |
| 757 | + it = len(self._history) |
| 758 | + # use the DE-MCMC-Z proposal scheme as soon as the history has 2 entries |
| 759 | + if it > 1: |
| 760 | + # differential evolution proposal |
| 761 | + # select two other chains |
| 762 | + iz1 = np.random.randint(it) |
| 763 | + iz2 = np.random.randint(it) |
| 764 | + while iz2 == iz1: |
| 765 | + iz2 = np.random.randint(it) |
| 766 | + |
| 767 | + z1 = self._history[iz1] |
| 768 | + z2 = self._history[iz2] |
| 769 | + # propose a jump |
| 770 | + q = floatX(q0 + self.lamb * (z1 - z2) + epsilon) |
| 771 | + else: |
| 772 | + # propose just with noise in the first 2 iterations |
| 773 | + q = floatX(q0 + epsilon) |
| 774 | + |
| 775 | + accept = self.delta_logp(q, q0) |
| 776 | + q_new, accepted = metrop_select(accept, q, q0) |
| 777 | + self.accepted += accepted |
| 778 | + self._history.append(q_new) |
| 779 | + |
| 780 | + self.steps_until_tune -= 1 |
| 781 | + |
| 782 | + stats = { |
| 783 | + 'tune': self.tune, |
| 784 | + 'scaling': self.scaling, |
| 785 | + 'lambda': self.lamb, |
| 786 | + 'accept': np.exp(accept), |
| 787 | + 'accepted': accepted |
| 788 | + } |
| 789 | + |
| 790 | + return q_new, [stats] |
| 791 | + |
| 792 | + def stop_tuning(self): |
| 793 | + """At the end of the tuning phase, this method removes the first x% of the history |
| 794 | + so future proposals are not informed by unconverged tuning iterations. |
| 795 | + """ |
| 796 | + it = len(self._history) |
| 797 | + n_drop = int(self.tune_drop_fraction * it) |
| 798 | + self._history = self._history[n_drop:] |
| 799 | + return super().stop_tuning() |
| 800 | + |
| 801 | + @staticmethod |
| 802 | + def competence(var, has_grad): |
| 803 | + if var.dtype in pm.discrete_types: |
| 804 | + return Competence.INCOMPATIBLE |
| 805 | + return Competence.COMPATIBLE |
| 806 | + |
| 807 | + |
633 | 808 | def sample_except(limit, excluded):
|
634 | 809 | candidate = nr.choice(limit - 1)
|
635 | 810 | if candidate >= excluded:
|
|
0 commit comments