Skip to content

[WIP] Proposed revision to sample_posterior_predictive() #3468

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 21, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions RELEASE-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,15 @@

- `nuts_kwargs` and `step_kwargs` have been deprecated in favor of using the standard `kwargs` to pass optional step method arguments.
- `SGFS` and `CSG` have been removed (Fix for [#3353](https://github.com/pymc-devs/pymc3/issues/3353)). They have been moved to [pymc3-experimental](https://github.com/pymc-devs/pymc3-experimental).
<<<<<<< master
- References to `live_plot` and corresponding notebooks have been removed.
- Function `approx_hessian` was removed, due to `numdifftools` becoming incompatible with current `scipy`. The function was already optional, only available to a user who installed `numdifftools` separately, and not hit on any common codepaths. [#3485](https://github.com/pymc-devs/pymc3/pull/3485).
- Deprecated `vars` parameter of `sample_posterior_predictive` in favor of `varnames`.
=======
- References to `live_plot` and corresponding notebooks have been removed.
- Deprecated `vars` parameters of `sample_posterior_predictive` and `sample_prior_predictive` in favor of `var_names`. At least for the latter, this is more accurate, since the `vars` parameter actually took names.
>>>>>>> Update release notes.

## PyMC3 3.6 (Dec 21 2018)

Expand Down
3 changes: 2 additions & 1 deletion pymc3/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import itertools
import threading
import warnings
from typing import Optional

import numpy as np
from pandas import Series
Expand Down Expand Up @@ -187,7 +188,7 @@ def get_context(cls):
raise TypeError("No context on context stack")


def modelcontext(model):
def modelcontext(model: Optional['Model']) -> 'Model':
"""return the given model or try to find it in the context if there was
none supplied.
"""
Expand Down
63 changes: 52 additions & 11 deletions pymc3/sampling.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
from typing import Dict, List, Optional, TYPE_CHECKING, cast
if TYPE_CHECKING:
from typing import Any
from typing import Iterable as TIterable
from collections import defaultdict, Iterable
from copy import copy
import pickle
Expand All @@ -6,11 +10,12 @@

import numpy as np
import theano.gradient as tg
from theano.tensor import Tensor

from .backends.base import BaseTrace, MultiTrace
from .backends.ndarray import NDArray
from .distributions.distribution import draw_values
from .model import modelcontext, Point, all_continuous
from .model import modelcontext, Point, all_continuous, Model
from .step_methods import (NUTS, HamiltonianMC, Metropolis, BinaryMetropolis,
BinaryGibbsMetropolis, CategoricalGibbsMetropolis,
Slice, CompoundStep, arraystep, smc)
Expand Down Expand Up @@ -529,7 +534,6 @@ def _sample_population(draws, chain, chains, start, random_seed, step, tune,
def _sample(chain, progressbar, random_seed, start, draws=None, step=None,
trace=None, tune=None, model=None, **kwargs):
skip_first = kwargs.get('skip_first', 0)
refresh_every = kwargs.get('refresh_every', 100)

sampling = _iter_sample(draws, step, start, trace, chain,
tune, model, random_seed)
Expand Down Expand Up @@ -1027,8 +1031,14 @@ def stop_tuning(step):
return step


def sample_posterior_predictive(trace, samples=None, model=None, vars=None, size=None,
random_seed=None, progressbar=True):
def sample_posterior_predictive(trace,
samples: Optional[int]=None,
model: Optional[Model]=None,
vars: Optional[TIterable[Tensor]]=None,
var_names: Optional[List[str]]=None,
size: Optional[int]=None,
random_seed=None,
progressbar: bool=True) -> Dict[str, np.ndarray]:
"""Generate posterior predictive samples from a model given a trace.

Parameters
Expand All @@ -1042,7 +1052,10 @@ def sample_posterior_predictive(trace, samples=None, model=None, vars=None, size
Model used to generate `trace`
vars : iterable
Variables for which to compute the posterior predictive samples.
Defaults to `model.observed_RVs`.
Defaults to `model.observed_RVs`. Deprecated: please use `var_names` instead.
var_names : Iterable[str]
Alternative way to specify vars to sample, to make this function orthogonal with
others.
size : int
The number of random draws from the distribution specified by the parameters in each
sample of the trace.
Expand All @@ -1056,7 +1069,7 @@ def sample_posterior_predictive(trace, samples=None, model=None, vars=None, size
Returns
-------
samples : dict
Dictionary with the variables as keys. The values corresponding to the
Dictionary with the variable names as keys, and values numpy arrays containing
posterior predictive samples.
"""
len_trace = len(trace)
Expand All @@ -1070,6 +1083,14 @@ def sample_posterior_predictive(trace, samples=None, model=None, vars=None, size

model = modelcontext(model)

if var_names is not None:
if vars is not None:
raise ValueError("Should not specify both vars and var_names arguments.")
else:
vars = [model[x] for x in var_names]
elif vars is not None: # var_names is None, and vars is not.
warnings.warn("vars argument is deprecated in favor of var_names.",
DeprecationWarning)
if vars is None:
vars = model.observed_RVs

Expand All @@ -1081,7 +1102,7 @@ def sample_posterior_predictive(trace, samples=None, model=None, vars=None, size
if progressbar:
indices = tqdm(indices, total=samples)

ppc_trace = defaultdict(list)
ppc_trace = defaultdict(list) # type: Dict[str, List[Any]]
try:
for idx in indices:
if nchain > 1:
Expand Down Expand Up @@ -1250,18 +1271,28 @@ def sample_ppc_w(*args, **kwargs):
return sample_posterior_predictive_w(*args, **kwargs)


def sample_prior_predictive(samples=500, model=None, vars=None, random_seed=None):
def sample_prior_predictive(samples=500,
model: Optional[Model]=None,
vars: Optional[TIterable[str]] = None,
var_names: Optional[TIterable[str]] = None,
random_seed=None) -> Dict[str, np.ndarray]:
"""Generate samples from the prior predictive distribution.

Parameters
----------
samples : int
Number of samples from the prior predictive to generate. Defaults to 500.
model : Model (optional if in `with` context)
vars : iterable
vars : Iterable[str]
A list of names of variables for which to compute the posterior predictive
samples.
Defaults to `model.named_vars`.
DEPRECATED - Use `var_names` instead.
var_names : Iterable[str]
A list of names of variables for which to compute the posterior predictive
samples.
Defaults to `model.named_vars`.

random_seed : int
Seed for the random number generator.

Expand All @@ -1273,8 +1304,16 @@ def sample_prior_predictive(samples=500, model=None, vars=None, random_seed=None
"""
model = modelcontext(model)

if vars is None:
if vars is None and var_names is None:
vars = set(model.named_vars.keys())
elif vars is None:
vars = var_names
elif vars is not None:
warnings.warn("vars argument is deprecated in favor of var_names.",
DeprecationWarning)
else:
raise ValueError("Cannot supply both vars and var_names arguments.")
vars = cast(TIterable[str], vars) # tell mypy that vars cannot be None here.

if random_seed is not None:
np.random.seed(random_seed)
Expand All @@ -1283,8 +1322,10 @@ def sample_prior_predictive(samples=500, model=None, vars=None, random_seed=None
values = draw_values([model[name] for name in names], size=samples)

data = {k: v for k, v in zip(names, values)}
if data is None:
raise AssertionError("No variables sampled: attempting to sample %s"%names)

prior = {}
prior = {} # type: Dict[str, np.ndarray]
for var_name in vars:
if var_name in data:
prior[var_name] = data[var_name]
Expand Down
39 changes: 36 additions & 3 deletions pymc3/tests/test_sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,36 @@ def test_model_shared_variable(self):
assert np.allclose(post_pred["p"], expected_p)

def test_deterministic_of_observed(self):
meas_in_1 = pm.theanof.floatX(2 + 4 * np.random.randn(100))
meas_in_2 = pm.theanof.floatX(5 + 4 * np.random.randn(100))
with pm.Model() as model:
mu_in_1 = pm.Normal("mu_in_1", 0, 1)
sigma_in_1 = pm.HalfNormal("sd_in_1", 1)
mu_in_2 = pm.Normal("mu_in_2", 0, 1)
sigma_in_2 = pm.HalfNormal("sd__in_2", 1)

in_1 = pm.Normal("in_1", mu_in_1, sigma_in_1, observed=meas_in_1)
in_2 = pm.Normal("in_2", mu_in_2, sigma_in_2, observed=meas_in_2)
out_diff = in_1 + in_2
pm.Deterministic("out", out_diff)

trace = pm.sample(100)
ppc_trace = pm.trace_to_dataframe(
trace, varnames=[n for n in trace.varnames if n != "out"]
).to_dict("records")
with pytest.warns(DeprecationWarning):
ppc = pm.sample_posterior_predictive(
model=model,
trace=ppc_trace,
samples=len(ppc_trace),
vars=(model.deterministics + model.basic_RVs)
)

rtol = 1e-5 if theano.config.floatX == "float64" else 1e-3
assert np.allclose(ppc["in_1"] + ppc["in_2"], ppc["out"], rtol=rtol)


def test_deterministic_of_observed_modified_interface(self):
meas_in_1 = pm.theanof.floatX(2 + 4 * np.random.randn(100))
meas_in_2 = pm.theanof.floatX(5 + 4 * np.random.randn(100))
with pm.Model() as model:
Expand All @@ -380,7 +410,7 @@ def test_deterministic_of_observed(self):
model=model,
trace=ppc_trace,
samples=len(ppc_trace),
vars=(model.deterministics + model.basic_RVs),
var_names=[x.name for x in (model.deterministics + model.basic_RVs)],
)

rtol = 1e-5 if theano.config.floatX == "float64" else 1e-3
Expand Down Expand Up @@ -466,10 +496,13 @@ def test_respects_shape(self):
with pm.Model():
mu = pm.Gamma("mu", 3, 1, shape=1)
goals = pm.Poisson("goals", mu, shape=shape)
trace = pm.sample_prior_predictive(10)
with pytest.warns(DeprecationWarning):
trace1 = pm.sample_prior_predictive(10, vars=['mu', 'goals'])
trace2 = pm.sample_prior_predictive(10, var_names=['mu', 'goals'])
if shape == 2: # want to test shape as an int
shape = (2,)
assert trace["goals"].shape == (10,) + shape
assert trace1["goals"].shape == (10,) + shape
assert trace2["goals"].shape == (10,) + shape

def test_multivariate(self):
with pm.Model():
Expand Down