pymc-devs
diff --git a/Diff for: ‎RELEASE-NOTES.md
+3 b/Diff for: ‎RELEASE-NOTES.md
+3
diff --git a/Diff for: ‎pymc3/examples/samplers_mvnormal.py
+100 b/Diff for: ‎pymc3/examples/samplers_mvnormal.py
+100
@@ -23,6 +23,9 @@
 - Fixed `sample_ppc` and `sample_ppc_w` to iterate all chains(#2633)
 - Add Bayesian R2 score (for GLMs) `stats.r2_score` (#2696) and test (#2729).
 
+### New Features
+- Michael Osthege added support for population-samplers and implemented differential evolution metropolis (`DEMetropolis`).  For models with correlated dimensions that can not use gradient-based samplers, the `DEMetropolis` sampler can give higher effective sampling rates. (also see [PR#2735](https://github.com/pymc-devs/pymc3/pull/2735))
+
 
 ## PyMC3 3.2 (October 10, 2017)
 
 
@@ -0,0 +1,100 @@
+"""
+Comparing different samplers on a correlated bivariate normal distribution.
+
+This example will sample a bivariate normal with Metropolis, NUTS and DEMetropolis
+at two correlations (0, 0.9) and print out the effective sample sizes, runtime and
+normalized effective sampling rates.
+"""
+
+
+import numpy as np
+import time
+import pandas as pd
+import pymc3 as pm
+import theano.tensor as tt
+
+# with this flag one can switch between defining the bivariate normal as
+# either a 2D MvNormal (USE_XY = False) split up the two dimensions into
+# two variables 'x' and 'y'.  The latter is recommended because it highlights
+# different behaviour with respect to blocking.
+USE_XY = True
+
+def run(steppers, p):
+    steppers = set(steppers)
+    traces = {}
+    effn = {}
+    runtimes = {}
+
+    with pm.Model() as model:
+        if USE_XY:
+            x = pm.Flat('x')
+            y = pm.Flat('y')
+            mu = np.array([0.,0.])
+            cov = np.array([[1.,p],[p,1.]])
+            z = pm.MvNormal.dist(mu=mu, cov=cov, shape=(2,)).logp(tt.stack([x,y]))
+            pot = pm.Potential('logp_xy', z)
+            start = {'x': 0, 'y': 0}
+        else:
+            mu = np.array([0.,0.])
+            cov = np.array([[1.,p],[p,1.]])
+            z = pm.MvNormal('z', mu=mu, cov=cov, shape=(2,))
+            start={'z': [0, 0]}
+
+        for step_cls in steppers:
+            name = step_cls.__name__
+            t_start = time.time()
+            mt = pm.sample(
+                draws=10000,
+                chains=16, parallelize=False,
+                step=step_cls(),
+                start=start
+            )
+            runtimes[name] = time.time() - t_start
+            print('{} samples across {} chains'.format(len(mt) * mt.nchains, mt.nchains))
+            traces[name] = mt
+            en = pm.diagnostics.effective_n(mt)
+            print('effective: {}\r\n'.format(en))
+            if USE_XY:
+                effn[name] = np.mean(en['x']) / len(mt) / mt.nchains
+            else:
+                effn[name] = np.mean(en['z']) / len(mt) / mt.nchains
+    return traces, effn, runtimes
+
+
+if __name__ == '__main__':
+    methods = [
+        pm.Metropolis,
+        pm.Slice,
+        pm.NUTS,
+        pm.DEMetropolis
+    ]
+    names = [c.__name__ for c in methods]
+
+    df_base = pd.DataFrame(columns=['p'] + names)
+    df_base['p'] = [.0,.9]
+    df_base = df_base.set_index('p')
+
+    df_effectiven = df_base.copy()
+    df_runtime = df_base.copy()
+    df_performance = df_base.copy()
+
+    for p in df_effectiven.index:
+        trace, rate, runtime = run(methods, p)
+        for name in names:
+            df_effectiven.set_value(p, name, rate[name])
+            df_runtime.set_value(p, name, runtime[name])
+            df_performance.set_value(p, name, rate[name] / runtime[name])
+
+    print('\r\nEffective sample size [0...1]')
+    print(df_effectiven.T.to_string(float_format='{:.3f}'.format))
+
+    print('\r\nRuntime [s]')
+    print(df_runtime.T.to_string(float_format='{:.1f}'.format))
+
+    if 'NUTS' in names:
+        print('\r\nNormalized effective sampling rate [0...1]')
+        df_performance = df_performance.T / df_performance.loc[0]['NUTS']
+    else:
+        print('\r\nNormalized effective sampling rate [1/s]')
+        df_performance = df_performance.T
+    print(df_performance.to_string(float_format='{:.3f}'.format))