Skip to content

Commit 14a7fde

Browse files
including ModelBuilder intro notebook (#565)
* including ModelBuilder intro notebook * notebook formatting * removing initial notebook, updating model_builder.ipynb * removing the mb folder, mb intro in examples/howto * updating .myst.md file
1 parent a7bf81a commit 14a7fde

File tree

2 files changed

+250
-135
lines changed

2 files changed

+250
-135
lines changed

Diff for: examples/howto/model_builder.ipynb

+141-81
Large diffs are not rendered by default.

Diff for: examples/howto/model_builder.myst.md

+109-54
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,16 @@ jupytext:
55
format_name: myst
66
format_version: 0.13
77
kernelspec:
8-
display_name: pymc-dev
8+
display_name: pymc-marketing
99
language: python
10-
name: python3
10+
name: pymc-marketing
1111
---
1212

1313
# Using ModelBuilder class for deploying PyMC models
1414
:::{post} Feb 22, 2023
1515
:tags: deployment
1616
:category: Advanced
17-
:author: Shashank Kirtania, Thomas Wiecki
17+
:author: Shashank Kirtania, Thomas Wiecki, Michał Raczycki
1818
:::
1919

2020
+++
@@ -32,6 +32,8 @@ The new `ModelBuilder` class allows users to use methods to `fit()`, `predict()`
3232
Let's go through the full workflow, starting with a simple linear regression PyMC model as it's usually written. Of course, this model is just a place-holder for your own model.
3333

3434
```{code-cell} ipython3
35+
from typing import Dict, List, Optional, Tuple, Union
36+
3537
import arviz as az
3638
import matplotlib.pyplot as plt
3739
import numpy as np
@@ -99,10 +101,11 @@ To define our desired model we inherit from the `ModelBuilder` class. There are
99101
class LinearModel(ModelBuilder):
100102
# Give the model a name
101103
_model_type = "LinearModel"
104+
102105
# And a version
103106
version = "0.1"
104107
105-
def build_model(self, model_config, data=None):
108+
def build_model(self, X: pd.DataFrame, y: Union[pd.Series, np.ndarray], **kwargs):
106109
"""
107110
build_model creates the PyMC model
108111
@@ -112,76 +115,126 @@ class LinearModel(ModelBuilder):
112115
data: Dict[str, Union[np.ndarray, pd.DataFrame, pd.Series]]
113116
Data we want our model fit on.
114117
"""
115-
# Note that we do not have to define a with-context
116-
117-
# Create mutable data containers
118-
x_data = pm.MutableData("x_data", data["input"].values)
119-
y_data = pm.MutableData("y_data", data["output"].values)
120-
121-
# prior parameters
122-
a_mu_prior = model_config.get("a_mu_prior", 0.0)
123-
a_sigma_prior = model_config.get("a_sigma_prior", 1.0)
124-
b_mu_prior = model_config.get("b_mu_prior", 0.0)
125-
b_sigma_prior = model_config.get("b_sigma_prior", 1.0)
126-
eps_prior = model_config.get("eps_prior", 1.0)
127-
128-
# priors
129-
a = pm.Normal("a", mu=a_mu_prior, sigma=a_sigma_prior)
130-
b = pm.Normal("b", mu=b_mu_prior, sigma=b_sigma_prior)
131-
eps = pm.HalfNormal("eps", eps_prior)
132-
133-
obs = pm.Normal("y", mu=a + b * x_data, sigma=eps, shape=x_data.shape, observed=y_data)
134-
135-
def _data_setter(self, data: pd.DataFrame):
136-
"""
137-
_data_setter works as a set_data for the model and updates the data whenever we need to.
138-
Parameters:
139-
data: Dict[str, Union[np.ndarray, pd.DataFrame, pd.Series]]
140-
It is the data we need to update for the model.
141-
"""
118+
# Check the type of X and y and adjust access accordingly
119+
X_values = X["input"].values
120+
y_values = y.values if isinstance(y, pd.Series) else y
121+
self._generate_and_preprocess_model_data(X_values, y_values)
122+
123+
with pm.Model(coords=self.model_coords) as self.model:
124+
125+
# Create mutable data containers
126+
x_data = pm.MutableData("x_data", X_values)
127+
y_data = pm.MutableData("y_data", y_values)
128+
129+
# prior parameters
130+
a_mu_prior = self.model_config.get("a_mu_prior", 0.0)
131+
a_sigma_prior = self.model_config.get("a_sigma_prior", 1.0)
132+
b_mu_prior = self.model_config.get("b_mu_prior", 0.0)
133+
b_sigma_prior = self.model_config.get("b_sigma_prior", 1.0)
134+
eps_prior = self.model_config.get("eps_prior", 1.0)
135+
136+
# priors
137+
a = pm.Normal("a", mu=a_mu_prior, sigma=a_sigma_prior)
138+
b = pm.Normal("b", mu=b_mu_prior, sigma=b_sigma_prior)
139+
eps = pm.HalfNormal("eps", eps_prior)
140+
141+
obs = pm.Normal("y", mu=a + b * x_data, sigma=eps, shape=x_data.shape, observed=y_data)
142+
143+
def _data_setter(
144+
self, X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray] = None
145+
):
146+
if isinstance(X, pd.DataFrame):
147+
x_values = X["input"].values
148+
else:
149+
# Assuming "input" is the first column
150+
x_values = X[:, 0]
142151
143152
with self.model:
144-
pm.set_data({"x_data": data["input"].values})
145-
if "output" in data.columns:
146-
pm.set_data({"y_data": data["output"].values})
153+
pm.set_data({"x_data": x_values})
154+
if y is not None:
155+
pm.set_data({"y_data": y.values if isinstance(y, pd.Series) else y})
147156
148-
@classmethod
149-
def create_sample_input(cls):
157+
@property
158+
def default_model_config(self) -> Dict:
150159
"""
151-
Creates example input and parameters to test the model on.
152-
This is optional but useful.
160+
default_model_config is a property that returns a dictionary with all the prior values we want to build the model with.
161+
It supports more complex data structures like lists, dictionaries, etc.
162+
It will be passed to the class instance on initialization, in case the user doesn't provide any model_config of their own.
153163
"""
154-
155-
x = np.linspace(start=0, stop=1, num=100)
156-
y = 0.3 * x + 0.5
157-
y = y + np.random.normal(0, 1, len(x))
158-
data = pd.DataFrame({"input": x, "output": y})
159-
160-
model_config = {
164+
model_config: Dict = {
161165
"a_mu_prior": 0.0,
162166
"a_sigma_prior": 1.0,
163167
"b_mu_prior": 0.0,
164168
"b_sigma_prior": 1.0,
165169
"eps_prior": 1.0,
166170
}
171+
return model_config
167172
168-
sampler_config = {
173+
@property
174+
def default_sampler_config(self) -> Dict:
175+
"""
176+
default_sampler_config is a property that returns a dictionary with all most important sampler parameters.
177+
It will be used in case the user doesn't provide any sampler_config of their own.
178+
"""
179+
sampler_config: Dict = {
169180
"draws": 1_000,
170181
"tune": 1_000,
171182
"chains": 3,
172183
"target_accept": 0.95,
173184
}
185+
return sampler_config
174186
175-
return data, model_config, sampler_config
176-
```
187+
@property
188+
def output_var(self):
189+
return "y"
190+
191+
@property
192+
def _serializable_model_config(self) -> Dict[str, Union[int, float, Dict]]:
193+
"""
194+
_serializable_model_config is a property that returns a dictionary with all the model parameters that we want to save.
195+
as some of the data structures are not json serializable, we need to convert them to json serializable objects.
196+
Some models will need them, others can just define them to return the model_config.
197+
"""
198+
return self.model_config
199+
200+
def _save_input_params(self, idata) -> None:
201+
"""
202+
Saves any additional model parameters (other than the dataset) to the idata object.
203+
204+
These parameters are stored within `idata.attrs` using keys that correspond to the parameter names.
205+
If you don't need to store any extra parameters, you can leave this method unimplemented.
177206
178-
Now we can create the `LinearModel` object.
207+
Example:
208+
For saving customer IDs provided as an 'customer_ids' input to the model:
209+
self.customer_ids = customer_ids.values #this line is done outside of the function, preferably at the initialization of the model object.
210+
idata.attrs["customer_ids"] = json.dumps(self.customer_ids.tolist()) # Convert numpy array to a JSON-serializable list.
211+
"""
212+
pass
179213
180-
But we need some example data. This is where defining a `create_sample_input()` method as done above is useful. It gives users of your model an easy way to generate data (and configurations) to test your model on.
214+
pass
215+
216+
def _generate_and_preprocess_model_data(
217+
self, X: Union[pd.DataFrame, pd.Series], y: Union[pd.Series, np.ndarray]
218+
) -> None:
219+
"""
220+
Depending on the model, we might need to preprocess the data before fitting the model.
221+
all required preprocessing and conditional assignments should be defined here.
222+
"""
223+
self.model_coords = None # in our case we're not using coords, but if we were, we would define them here, or later on in the function, if extracting them from the data.
224+
# as we don't do any data preprocessing, we just assign the data givenin by the user. Note that it's very basic model,
225+
# and usually we would need to do some preprocessing, or generate the coords from the data.
226+
self.X = X
227+
self.y = y
228+
```
229+
230+
Now we can create the `LinearModel` object. First step we need to take care of, is data generation:
181231

182232
```{code-cell} ipython3
183-
data, model_config, sampler_config = LinearModel.create_sample_input()
184-
model = LinearModel(model_config, sampler_config, data)
233+
X = pd.DataFrame(data=np.linspace(start=0, stop=1, num=100), columns=["input"])
234+
y = 0.3 * x + 0.5
235+
y = y + np.random.normal(0, 1, len(x))
236+
237+
model = LinearModel()
185238
```
186239

187240
After making the object of class `LinearModel` we can fit the model using the `.fit()` method.
@@ -201,7 +254,7 @@ The `fit()` method takes one argument `data` on which we need to fit the model.
201254
* `model_config` : It stores values of the model configuration set by user for this particular model.
202255

203256
```{code-cell} ipython3
204-
idata = model.fit()
257+
idata = model.fit(X, y)
205258
```
206259

207260
## Saving model to file
@@ -254,6 +307,7 @@ Our first task is to create data on which we need to predict.
254307
```{code-cell} ipython3
255308
x_pred = np.random.uniform(low=1, high=2, size=10)
256309
prediction_data = pd.DataFrame({"input": x_pred})
310+
type(prediction_data["input"].values)
257311
```
258312

259313
`ModelBuilder` provides two methods for prediction:
@@ -274,7 +328,7 @@ posterior = az.extract(idata, num_samples=20)
274328
x_plot = xr.DataArray(np.linspace(1, 2, 100))
275329
y_plot = posterior["b"] * x_plot + posterior["a"]
276330
Line2 = ax.plot(x_plot, y_plot.transpose(), color="C1")
277-
Line1 = ax.plot(x_pred, pred_mean["y"], "x")
331+
Line1 = ax.plot(x_pred, pred_mean, "x")
278332
ax.set(title="Posterior predictive regression lines", xlabel="x", ylabel="y")
279333
ax.legend(
280334
handles=[Line1[0], Line2[0]], labels=["predicted average", "inferred regression line"], loc=0
@@ -288,6 +342,7 @@ ax.legend(
288342

289343
## Authors
290344
* Authored by Shashank Kirtania and Thomas Wiecki in 2023.
345+
* Modified and updated by Michał Raczycki in 08/2023
291346

292347
+++
293348

0 commit comments

Comments
 (0)