Skip to content

Commit acb8da0

Browse files
authored
Increase unittest check_logcdf coverage and fix issues with some distribution methods (#4393)
* Change `check_logcdf` to test that values below or above domain are properly evaluated. Fix issues with `Uniform`, `HalfNormal`, `Gamma`, and `InverseGamma` distributions. * Add multiple value test for logcdf. Add more informative comment for Gamma and InverseGamma hack. Update Release note. * Update release note * Update docstrings with valid value types * Update RELEASE-NOTES.md * Add more informative comments and remove TODO * TypeError: format -> f-strings * Ignore finite upper limit in Nat domains. Move new checks to `check_logcdf`. * Use `tt.switch` in `DiscreteUniform` for hard boundary (addresses previously failing test in 32bit OS)
1 parent a21fafa commit acb8da0

File tree

4 files changed

+133
-71
lines changed

4 files changed

+133
-71
lines changed

Diff for: RELEASE-NOTES.md

+1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ It also brings some dreadfully awaited fixes, so be sure to go through the chang
2929
- Fixed mathematical formulation in `MvStudentT` random method. (see [#4359](https://github.com/pymc-devs/pymc3/pull/4359))
3030
- Fix issue in `logp` method of `HyperGeometric`. It now returns `-inf` for invalid parameters (see [4367](https://github.com/pymc-devs/pymc3/pull/4367))
3131
- Fixed `MatrixNormal` random method to work with parameters as random variables. (see [#4368](https://github.com/pymc-devs/pymc3/pull/4368))
32+
- Update the `logcdf` method of several continuous distributions to return -inf for invalid parameters and values, and raise an informative error when multiple values cannot be evaluated in a single call. (see [4393](https://github.com/pymc-devs/pymc3/pull/4393))
3233

3334
## PyMC3 3.10.0 (7 December 2020)
3435

Diff for: pymc3/distributions/continuous.py

+83-43
Original file line numberDiff line numberDiff line change
@@ -278,21 +278,24 @@ def logcdf(self, value):
278278
279279
Parameters
280280
----------
281-
value: numeric
281+
value: numeric or np.ndarray or theano.tensor
282282
Value(s) for which log CDF is calculated. If the log CDF for multiple
283283
values are desired the values must be provided in a numpy array or theano tensor.
284284
285285
Returns
286286
-------
287287
TensorVariable
288288
"""
289+
lower = self.lower
290+
upper = self.upper
291+
289292
return tt.switch(
290-
tt.or_(tt.lt(value, self.lower), tt.gt(value, self.upper)),
293+
tt.lt(value, lower) | tt.lt(upper, lower),
291294
-np.inf,
292295
tt.switch(
293-
tt.eq(value, self.upper),
296+
tt.lt(value, upper),
297+
tt.log(value - lower) - tt.log(upper - lower),
294298
0,
295-
tt.log(value - self.lower) - tt.log(self.upper - self.lower),
296299
),
297300
)
298301

@@ -344,7 +347,7 @@ def logcdf(self, value):
344347
345348
Parameters
346349
----------
347-
value: numeric
350+
value: numeric or np.ndarray or theano.tensor
348351
Value(s) for which log CDF is calculated. If the log CDF for multiple
349352
values are desired the values must be provided in a numpy array or theano tensor.
350353
@@ -401,7 +404,7 @@ def logcdf(self, value):
401404
402405
Parameters
403406
----------
404-
value: numeric
407+
value: numeric or np.ndarray or theano.tensor
405408
Value(s) for which log CDF is calculated. If the log CDF for multiple
406409
values are desired the values must be provided in a numpy array or theano tensor.
407410
@@ -542,7 +545,7 @@ def logcdf(self, value):
542545
543546
Parameters
544547
----------
545-
value: numeric
548+
value: numeric or np.ndarray or theano.tensor
546549
Value(s) for which log CDF is calculated. If the log CDF for multiple
547550
values are desired the values must be provided in a numpy array or theano tensor.
548551
@@ -900,7 +903,7 @@ def logcdf(self, value):
900903
901904
Parameters
902905
----------
903-
value: numeric
906+
value: numeric or np.ndarray or theano.tensor
904907
Value(s) for which log CDF is calculated. If the log CDF for multiple
905908
values are desired the values must be provided in a numpy array or theano tensor.
906909
@@ -910,10 +913,10 @@ def logcdf(self, value):
910913
"""
911914
sigma = self.sigma
912915
z = zvalue(value, mu=0, sigma=sigma)
913-
return tt.switch(
914-
tt.lt(z, -1.0),
915-
tt.log(tt.erfcx(-z / tt.sqrt(2.0))) - tt.sqr(z),
916+
return bound(
916917
tt.log1p(-tt.erfc(z / tt.sqrt(2.0))),
918+
0 <= value,
919+
0 < sigma,
917920
)
918921

919922

@@ -1106,7 +1109,7 @@ def logcdf(self, value):
11061109
11071110
Parameters
11081111
----------
1109-
value: numeric
1112+
value: numeric or np.ndarray or theano.tensor
11101113
Value(s) for which log CDF is calculated. If the log CDF for multiple
11111114
values are desired the values must be provided in a numpy array or theano tensor.
11121115
@@ -1297,20 +1300,30 @@ def logcdf(self, value):
12971300
Parameters
12981301
----------
12991302
value: numeric
1300-
Value(s) for which log CDF is calculated. If the log CDF for multiple
1301-
values are desired the values must be provided in a numpy array or theano tensor.
1303+
Value(s) for which log CDF is calculated.
13021304
13031305
Returns
13041306
-------
13051307
TensorVariable
13061308
"""
1307-
value = floatX(tt.as_tensor(value))
1308-
a = floatX(tt.as_tensor(self.alpha))
1309-
b = floatX(tt.as_tensor(self.beta))
1310-
return tt.switch(
1311-
tt.le(value, 0),
1312-
-np.inf,
1313-
tt.switch(tt.ge(value, 1), 0, tt.log(incomplete_beta(a, b, value))),
1309+
# incomplete_beta function can only handle scalar values (see #4342)
1310+
if np.ndim(value):
1311+
raise TypeError(
1312+
f"Beta.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object."
1313+
)
1314+
1315+
a = self.alpha
1316+
b = self.beta
1317+
1318+
return bound(
1319+
tt.switch(
1320+
tt.lt(value, 1),
1321+
tt.log(incomplete_beta(a, b, value)),
1322+
0,
1323+
),
1324+
0 <= value,
1325+
0 < a,
1326+
0 < b,
13141327
)
13151328

13161329
def _distr_parameters_for_repr(self):
@@ -1515,7 +1528,7 @@ def logcdf(self, value):
15151528
15161529
Parameters
15171530
----------
1518-
value: numeric
1531+
value: numeric or np.ndarray or theano.tensor
15191532
Value(s) for which log CDF is calculated. If the log CDF for multiple
15201533
values are desired the values must be provided in a numpy array or theano tensor.
15211534
@@ -1630,7 +1643,7 @@ def logcdf(self, value):
16301643
16311644
Parameters
16321645
----------
1633-
value: numeric
1646+
value: numeric or np.ndarray or theano.tensor
16341647
Value(s) for which log CDF is calculated. If the log CDF for multiple
16351648
values are desired the values must be provided in a numpy array or theano tensor.
16361649
@@ -1786,7 +1799,7 @@ def logcdf(self, value):
17861799
17871800
Parameters
17881801
----------
1789-
value: numeric
1802+
value: numeric or np.ndarray or theano.tensor
17901803
Value(s) for which log CDF is calculated. If the log CDF for multiple
17911804
values are desired the values must be provided in a numpy array or theano tensor.
17921805
@@ -1949,20 +1962,32 @@ def logcdf(self, value):
19491962
Parameters
19501963
----------
19511964
value: numeric
1952-
Value(s) for which log CDF is calculated. If the log CDF for multiple
1953-
values are desired the values must be provided in a numpy array or theano tensor.
1965+
Value(s) for which log CDF is calculated.
19541966
19551967
Returns
19561968
-------
19571969
TensorVariable
19581970
"""
1971+
# incomplete_beta function can only handle scalar values (see #4342)
1972+
if np.ndim(value):
1973+
raise TypeError(
1974+
f"StudentT.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object."
1975+
)
1976+
19591977
nu = self.nu
19601978
mu = self.mu
19611979
sigma = self.sigma
1980+
lam = self.lam
19621981
t = (value - mu) / sigma
19631982
sqrt_t2_nu = tt.sqrt(t ** 2 + nu)
19641983
x = (t + sqrt_t2_nu) / (2.0 * sqrt_t2_nu)
1965-
return tt.log(incomplete_beta(nu / 2.0, nu / 2.0, x))
1984+
1985+
return bound(
1986+
tt.log(incomplete_beta(nu / 2.0, nu / 2.0, x)),
1987+
0 < nu,
1988+
0 < sigma,
1989+
0 < lam,
1990+
)
19661991

19671992

19681993
class Pareto(Continuous):
@@ -2084,7 +2109,7 @@ def logcdf(self, value):
20842109
20852110
Parameters
20862111
----------
2087-
value: numeric
2112+
value: numeric or np.ndarray or theano.tensor
20882113
Value(s) for which log CDF is calculated. If the log CDF for multiple
20892114
values are desired the values must be provided in a numpy array or theano tensor.
20902115
@@ -2203,7 +2228,7 @@ def logcdf(self, value):
22032228
22042229
Parameters
22052230
----------
2206-
value: numeric
2231+
value: numeric or np.ndarray or theano.tensor
22072232
Value(s) for which log CDF is calculated. If the log CDF for multiple
22082233
values are desired the values must be provided in a numpy array or theano tensor.
22092234
@@ -2311,7 +2336,7 @@ def logcdf(self, value):
23112336
23122337
Parameters
23132338
----------
2314-
value: numeric
2339+
value: numeric or np.ndarray or theano.tensor
23152340
Value(s) for which log CDF is calculated. If the log CDF for multiple
23162341
values are desired the values must be provided in a numpy array or theano tensor.
23172342
@@ -2462,7 +2487,7 @@ def logcdf(self, value):
24622487
24632488
Parameters
24642489
----------
2465-
value: numeric
2490+
value: numeric or np.ndarray or theano.tensor
24662491
Value(s) for which log CDF is calculated. If the log CDF for multiple
24672492
values are desired the values must be provided in a numpy array or theano tensor.
24682493
@@ -2472,7 +2497,17 @@ def logcdf(self, value):
24722497
"""
24732498
alpha = self.alpha
24742499
beta = self.beta
2475-
return bound(tt.log(tt.gammainc(alpha, beta * value)), value >= 0, alpha > 0, beta > 0)
2500+
# Avoid C-assertion when the gammainc function is called with invalid values (#4340)
2501+
safe_alpha = tt.switch(tt.lt(alpha, 0), 0, alpha)
2502+
safe_beta = tt.switch(tt.lt(beta, 0), 0, beta)
2503+
safe_value = tt.switch(tt.lt(value, 0), 0, value)
2504+
2505+
return bound(
2506+
tt.log(tt.gammainc(safe_alpha, safe_beta * safe_value)),
2507+
0 <= value,
2508+
0 < alpha,
2509+
0 < beta,
2510+
)
24762511

24772512
def _distr_parameters_for_repr(self):
24782513
return ["alpha", "beta"]
@@ -2626,7 +2661,7 @@ def logcdf(self, value):
26262661
26272662
Parameters
26282663
----------
2629-
value: numeric
2664+
value: numeric or np.ndarray or theano.tensor
26302665
Value(s) for which log CDF is calculated. If the log CDF for multiple
26312666
values are desired the values must be provided in a numpy array or theano tensor.
26322667
@@ -2636,11 +2671,16 @@ def logcdf(self, value):
26362671
"""
26372672
alpha = self.alpha
26382673
beta = self.beta
2674+
# Avoid C-assertion when the gammaincc function is called with invalid values (#4340)
2675+
safe_alpha = tt.switch(tt.lt(alpha, 0), 0, alpha)
2676+
safe_beta = tt.switch(tt.lt(beta, 0), 0, beta)
2677+
safe_value = tt.switch(tt.lt(value, 0), 0, value)
2678+
26392679
return bound(
2640-
tt.log(tt.gammaincc(alpha, beta / value)),
2641-
value >= 0,
2642-
alpha > 0,
2643-
beta > 0,
2680+
tt.log(tt.gammaincc(safe_alpha, safe_beta / safe_value)),
2681+
0 <= value,
2682+
0 < alpha,
2683+
0 < beta,
26442684
)
26452685

26462686

@@ -2802,7 +2842,7 @@ def logcdf(self, value):
28022842
28032843
Parameters
28042844
----------
2805-
value: numeric
2845+
value: numeric or np.ndarray or theano.tensor
28062846
Value(s) for which log CDF is calculated. If the log CDF for multiple
28072847
values are desired the values must be provided in a numpy array or theano tensor.
28082848
@@ -3102,7 +3142,7 @@ def logcdf(self, value):
31023142
31033143
Parameters
31043144
----------
3105-
value: numeric
3145+
value: numeric or np.ndarray or theano.tensor
31063146
Value(s) for which log CDF is calculated. If the log CDF for multiple
31073147
values are desired the values must be provided in a numpy array or theano tensor.
31083148
@@ -3491,7 +3531,7 @@ def logcdf(self, value):
34913531
34923532
Parameters
34933533
----------
3494-
value: numeric
3534+
value: numeric or np.ndarray or theano.tensor
34953535
Value(s) for which log CDF is calculated. If the log CDF for multiple
34963536
values are desired the values must be provided in a numpy array or theano tensor.
34973537
@@ -3620,7 +3660,7 @@ def logcdf(self, value):
36203660
36213661
Parameters
36223662
----------
3623-
value: numeric
3663+
value: numeric or np.ndarray or theano.tensor
36243664
Value(s) for which log CDF is calculated. If the log CDF for multiple
36253665
values are desired the values must be provided in a numpy array or theano tensor.
36263666
@@ -3902,7 +3942,7 @@ def logcdf(self, value):
39023942
39033943
Parameters
39043944
----------
3905-
value: numeric
3945+
value: numeric or np.ndarray or theano.tensor
39063946
Value(s) for which log CDF is calculated. If the log CDF for multiple
39073947
values are desired the values must be provided in a numpy array or theano tensor.
39083948
@@ -4244,7 +4284,7 @@ def logcdf(self, value):
42444284
42454285
Parameters
42464286
----------
4247-
value: numeric
4287+
value: numeric or np.ndarray or theano.tensor
42484288
Value(s) for which log CDF is calculated. If the log CDF for multiple
42494289
values are desired the values must be provided in a numpy array or theano tensor.
42504290

0 commit comments

Comments
 (0)