From 059ef81d14242b5b812ff252fdc00dcdb3fb01fc Mon Sep 17 00:00:00 2001 From: Lucca Zenobio Date: Thu, 13 May 2021 21:55:56 -0300 Subject: [PATCH 1/8] feat: adapt student t --- pymc3/distributions/continuous.py | 69 ++++++++++-------------- pymc3/tests/test_distributions.py | 2 +- pymc3/tests/test_distributions_random.py | 25 +++++---- 3 files changed, 42 insertions(+), 54 deletions(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index a2b9bd4f47..5b77947e9d 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -1811,6 +1811,19 @@ def logcdf(value, mu, sigma): ) +class StudentTRV(RandomVariable): + name = "studentt" + ndim_supp = 0 + ndims_params = [0, 0] + dtype = "floatX" + _print_name = ("StudentT", "\\operatorname{StudentT}") + + @classmethod + def rng_fn(cls, rng, nu, mu, lam, size=None): + return stats.t.rvs(nu, mu, lam ** -0.5, size=size, random_state=rng) + +studentt = StudentTRV() + class StudentT(Continuous): r""" Student's T log-likelihood. @@ -1874,45 +1887,28 @@ class StudentT(Continuous): with pm.Model(): x = pm.StudentT('x', nu=15, mu=0, lam=1/23) """ - - def __init__(self, nu, mu=0, lam=None, sigma=None, sd=None, *args, **kwargs): - super().__init__(*args, **kwargs) + rv_op = studentt + + + @classmethod + def dist(cls, nu, mu=0, lam=None, sigma=None, sd=None, *args, **kwargs): if sd is not None: sigma = sd - self.nu = nu = at.as_tensor_variable(floatX(nu)) + nu = at.as_tensor_variable(floatX(nu)) lam, sigma = get_tau_sigma(tau=lam, sigma=sigma) - self.lam = lam = at.as_tensor_variable(lam) - self.sigma = self.sd = sigma = at.as_tensor_variable(sigma) - self.mean = self.median = self.mode = self.mu = mu = at.as_tensor_variable(mu) + lam = at.as_tensor_variable(lam) + sd = sigma = at.as_tensor_variable(sigma) + median = mode = mu = at.as_tensor_variable(mu) - self.variance = at.switch((nu > 2) * 1, (1 / self.lam) * (nu / (nu - 2)), np.inf) + variance = at.switch((nu > 2) * 1, (1 / lam) * (nu / (nu - 2)), np.inf) assert_negative_support(lam, "lam (sigma)", "StudentT") assert_negative_support(nu, "nu", "StudentT") - def random(self, point=None, size=None): - """ - Draw random values from StudentT distribution. - - Parameters - ---------- - point: dict, optional - Dict of variable values on which random values are to be - conditioned (uses default point if not specified). - size: int, optional - Desired size of random sample (returns one sample if not - specified). - - Returns - ------- - array - """ - # nu, mu, lam = draw_values([self.nu, self.mu, self.lam], point=point, size=size) - # return generate_samples( - # stats.t.rvs, nu, loc=mu, scale=lam ** -0.5, dist_shape=self.shape, size=size - # ) + return super().dist([nu, mu, lam], **kwargs) - def logp(self, value): + + def logp(value, nu, mu, lam): """ Calculate log-probability of StudentT distribution at specified value. @@ -1926,11 +1922,7 @@ def logp(self, value): ------- TensorVariable """ - nu = self.nu - mu = self.mu - lam = self.lam - sigma = self.sigma - + lam, sigma = get_tau_sigma(tau=lam, sigma=None) return bound( gammaln((nu + 1.0) / 2.0) + 0.5 * at.log(lam / (nu * np.pi)) @@ -1944,7 +1936,7 @@ def logp(self, value): def _distr_parameters_for_repr(self): return ["nu", "mu", "lam"] - def logcdf(self, value): + def logcdf(value, nu, mu, lam): """ Compute the log of the cumulative distribution function for Student's T distribution at the specified value. @@ -1964,10 +1956,7 @@ def logcdf(self, value): f"StudentT.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." ) - nu = self.nu - mu = self.mu - sigma = self.sigma - lam = self.lam + lam, sigma = get_tau_sigma(tau=lam, sigma=None) t = (value - mu) / sigma sqrt_t2_nu = at.sqrt(t ** 2 + nu) x = (t + sqrt_t2_nu) / (2.0 * sqrt_t2_nu) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 63466b7902..6a7bc9744c 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -1302,7 +1302,7 @@ def test_lognormal(self): n_samples=5, # Just testing alternative parametrization ) - @pytest.mark.xfail(reason="Distribution not refactored yet") + # @pytest.mark.xfail(reason="Distribution not refactored yet") def test_t(self): self.check_logp( StudentT, diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index 82d4f4e091..b12cdd4223 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -289,12 +289,6 @@ class TestAsymmetricLaplace(BaseTestCases.BaseTestCase): params = {"kappa": 1.0, "b": 1.0, "mu": 0.0} -@pytest.mark.xfail(reason="This distribution has not been refactored for v4") -class TestStudentT(BaseTestCases.BaseTestCase): - distribution = pm.StudentT - params = {"nu": 5.0, "mu": 0.0, "lam": 1.0} - - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") class TestChiSquared(BaseTestCases.BaseTestCase): distribution = pm.ChiSquared @@ -461,6 +455,18 @@ class TestGumbel(BaseTestDistribution): ] +class TestStudentT(BaseTestDistribution): + pymc_dist = pm.StudentT + pymc_dist_params = {"nu": 5.0, "mu": 0.0, "lam": 1.0} + expected_rv_op_params = {"nu": 5.0, "mu": 0.0, "lam": 1.0} + reference_dist_params = {"scale": 1.0, "loc": 0.0, "df": 5.0} + reference_dist = seeded_scipy_distribution_builder("t") + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference" + ] + + class TestNormal(BaseTestDistribution): pymc_dist = pm.Normal pymc_dist_params = {"mu": 5.0, "sigma": 10.0} @@ -1121,13 +1127,6 @@ def ref_rand(size, kappa, b, mu): pymc3_random(pm.AsymmetricLaplace, {"b": Rplus, "kappa": Rplus, "mu": R}, ref_rand=ref_rand) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") - def test_student_t(self): - def ref_rand(size, nu, mu, lam): - return st.t.rvs(nu, mu, lam ** -0.5, size=size) - - pymc3_random(pm.StudentT, {"nu": Rplus, "mu": R, "lam": Rplus}, ref_rand=ref_rand) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") def test_ex_gaussian(self): def ref_rand(size, mu, sigma, nu): From 1c6456367dce8a43023774a20ffbb26076bb5979 Mon Sep 17 00:00:00 2001 From: Lucca Zenobio Date: Thu, 13 May 2021 22:35:19 -0300 Subject: [PATCH 2/8] fix: comment --- pymc3/tests/test_distributions.py | 1 - pymc3/tests/test_distributions_random.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 6a7bc9744c..fb24998bdf 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -1302,7 +1302,6 @@ def test_lognormal(self): n_samples=5, # Just testing alternative parametrization ) - # @pytest.mark.xfail(reason="Distribution not refactored yet") def test_t(self): self.check_logp( StudentT, diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index b12cdd4223..108f931bbb 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -459,7 +459,7 @@ class TestStudentT(BaseTestDistribution): pymc_dist = pm.StudentT pymc_dist_params = {"nu": 5.0, "mu": 0.0, "lam": 1.0} expected_rv_op_params = {"nu": 5.0, "mu": 0.0, "lam": 1.0} - reference_dist_params = {"scale": 1.0, "loc": 0.0, "df": 5.0} + reference_dist_params = {"df": 5.0, "loc": 0.0, "scale": 1.0} reference_dist = seeded_scipy_distribution_builder("t") tests_to_run = [ "check_pymc_params_match_rv_op", From e422550753e2244b41c039738c7680867ab751ab Mon Sep 17 00:00:00 2001 From: Lucca Zenobio Date: Fri, 14 May 2021 09:25:16 -0300 Subject: [PATCH 3/8] feat: fix cr --- pymc3/distributions/continuous.py | 8 +------- pymc3/tests/test_distributions_random.py | 4 +++- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 5b77947e9d..b064b6f753 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -1814,7 +1814,7 @@ def logcdf(value, mu, sigma): class StudentTRV(RandomVariable): name = "studentt" ndim_supp = 0 - ndims_params = [0, 0] + ndims_params = [0, 0, 0] dtype = "floatX" _print_name = ("StudentT", "\\operatorname{StudentT}") @@ -1897,9 +1897,6 @@ def dist(cls, nu, mu=0, lam=None, sigma=None, sd=None, *args, **kwargs): nu = at.as_tensor_variable(floatX(nu)) lam, sigma = get_tau_sigma(tau=lam, sigma=sigma) lam = at.as_tensor_variable(lam) - sd = sigma = at.as_tensor_variable(sigma) - median = mode = mu = at.as_tensor_variable(mu) - variance = at.switch((nu > 2) * 1, (1 / lam) * (nu / (nu - 2)), np.inf) assert_negative_support(lam, "lam (sigma)", "StudentT") @@ -1933,9 +1930,6 @@ def logp(value, nu, mu, lam): sigma > 0, ) - def _distr_parameters_for_repr(self): - return ["nu", "mu", "lam"] - def logcdf(value, nu, mu, lam): """ Compute the log of the cumulative distribution function for Student's T distribution diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index 108f931bbb..19a33edcc4 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -461,9 +461,11 @@ class TestStudentT(BaseTestDistribution): expected_rv_op_params = {"nu": 5.0, "mu": 0.0, "lam": 1.0} reference_dist_params = {"df": 5.0, "loc": 0.0, "scale": 1.0} reference_dist = seeded_scipy_distribution_builder("t") + size = 15 tests_to_run = [ "check_pymc_params_match_rv_op", - "check_pymc_draws_match_reference" + "check_pymc_draws_match_reference", + "check_rv_size" ] From 2cfd39569d822ba2b53caee7ce6bf3feb3ab749d Mon Sep 17 00:00:00 2001 From: Lucca Zenobio Date: Fri, 14 May 2021 09:37:43 -0300 Subject: [PATCH 4/8] feat: fix cr size --- pymc3/tests/test_distributions_random.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index 19a33edcc4..f119891bfe 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -461,7 +461,6 @@ class TestStudentT(BaseTestDistribution): expected_rv_op_params = {"nu": 5.0, "mu": 0.0, "lam": 1.0} reference_dist_params = {"df": 5.0, "loc": 0.0, "scale": 1.0} reference_dist = seeded_scipy_distribution_builder("t") - size = 15 tests_to_run = [ "check_pymc_params_match_rv_op", "check_pymc_draws_match_reference", From 24dcb05e4f42dc230658753107ab0d5e56be13fb Mon Sep 17 00:00:00 2001 From: Lucca Zenobio Date: Fri, 14 May 2021 09:42:56 -0300 Subject: [PATCH 5/8] feat: fix linter --- pymc3/tests/test_distributions_random.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index f119891bfe..0b7120f85e 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -464,7 +464,7 @@ class TestStudentT(BaseTestDistribution): tests_to_run = [ "check_pymc_params_match_rv_op", "check_pymc_draws_match_reference", - "check_rv_size" + "check_rv_size", ] From 8e0b9affed8a3ddf3a1142d6f5aa92f3bd4e427a Mon Sep 17 00:00:00 2001 From: Lucca Zenobio Date: Fri, 14 May 2021 09:57:01 -0300 Subject: [PATCH 6/8] fix: posterior tests --- pymc3/tests/test_posteriors.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pymc3/tests/test_posteriors.py b/pymc3/tests/test_posteriors.py index 91ecd05f1f..dcb346ca20 100644 --- a/pymc3/tests/test_posteriors.py +++ b/pymc3/tests/test_posteriors.py @@ -76,7 +76,6 @@ class TestNUTSBetaBinomial(sf.NutsFixture, sf.BetaBinomialFixture): min_n_eff = 400 -@pytest.mark.xfail(reason="StudentT not refactored for v4") class TestNUTSStudentT(sf.NutsFixture, sf.StudentTFixture): n_samples = 10000 tune = 1000 @@ -98,7 +97,7 @@ class TestNUTSNormalLong(sf.NutsFixture, sf.NormalFixture): atol = 0.001 -@pytest.mark.xfail(reason="StudentT not refactored for v4") +@pytest.mark.xfail(reason="LKJCholeskyCov not refactored for v4") class TestNUTSLKJCholeskyCov(sf.NutsFixture, sf.LKJCholeskyCovFixture): n_samples = 2000 tune = 1000 From 3c1d501a6feb6870090e3ad0a97d62c2a54a5f75 Mon Sep 17 00:00:00 2001 From: Lucca Zenobio Date: Fri, 14 May 2021 10:04:57 -0300 Subject: [PATCH 7/8] linter --- pymc3/distributions/continuous.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index b064b6f753..492590fde8 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -1822,8 +1822,10 @@ class StudentTRV(RandomVariable): def rng_fn(cls, rng, nu, mu, lam, size=None): return stats.t.rvs(nu, mu, lam ** -0.5, size=size, random_state=rng) + studentt = StudentTRV() + class StudentT(Continuous): r""" Student's T log-likelihood. @@ -1888,8 +1890,8 @@ class StudentT(Continuous): x = pm.StudentT('x', nu=15, mu=0, lam=1/23) """ rv_op = studentt - - + + @classmethod def dist(cls, nu, mu=0, lam=None, sigma=None, sd=None, *args, **kwargs): if sd is not None: @@ -1904,7 +1906,7 @@ def dist(cls, nu, mu=0, lam=None, sigma=None, sd=None, *args, **kwargs): return super().dist([nu, mu, lam], **kwargs) - + def logp(value, nu, mu, lam): """ Calculate log-probability of StudentT distribution at specified value. From 12ffee0f0838a346673cb227f6fd3c55e029fa97 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 14 May 2021 15:33:33 +0200 Subject: [PATCH 8/8] Change default parameterization in terms of sigma and tweak tests --- pymc3/distributions/continuous.py | 22 ++++++++++------------ pymc3/tests/test_distributions.py | 16 +++++++++++++++- pymc3/tests/test_distributions_random.py | 16 +++++++++++++--- 3 files changed, 38 insertions(+), 16 deletions(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 492590fde8..a4e74048f6 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -1819,8 +1819,8 @@ class StudentTRV(RandomVariable): _print_name = ("StudentT", "\\operatorname{StudentT}") @classmethod - def rng_fn(cls, rng, nu, mu, lam, size=None): - return stats.t.rvs(nu, mu, lam ** -0.5, size=size, random_state=rng) + def rng_fn(cls, rng, nu, mu, sigma, size=None): + return stats.t.rvs(nu, mu, sigma, size=size, random_state=rng) studentt = StudentTRV() @@ -1891,23 +1891,20 @@ class StudentT(Continuous): """ rv_op = studentt - @classmethod def dist(cls, nu, mu=0, lam=None, sigma=None, sd=None, *args, **kwargs): if sd is not None: sigma = sd nu = at.as_tensor_variable(floatX(nu)) lam, sigma = get_tau_sigma(tau=lam, sigma=sigma) - lam = at.as_tensor_variable(lam) - variance = at.switch((nu > 2) * 1, (1 / lam) * (nu / (nu - 2)), np.inf) + sigma = at.as_tensor_variable(sigma) - assert_negative_support(lam, "lam (sigma)", "StudentT") + assert_negative_support(sigma, "sigma (lam)", "StudentT") assert_negative_support(nu, "nu", "StudentT") - return super().dist([nu, mu, lam], **kwargs) + return super().dist([nu, mu, sigma], **kwargs) - - def logp(value, nu, mu, lam): + def logp(value, nu, mu, sigma): """ Calculate log-probability of StudentT distribution at specified value. @@ -1921,7 +1918,7 @@ def logp(value, nu, mu, lam): ------- TensorVariable """ - lam, sigma = get_tau_sigma(tau=lam, sigma=None) + lam, sigma = get_tau_sigma(sigma=sigma) return bound( gammaln((nu + 1.0) / 2.0) + 0.5 * at.log(lam / (nu * np.pi)) @@ -1932,7 +1929,7 @@ def logp(value, nu, mu, lam): sigma > 0, ) - def logcdf(value, nu, mu, lam): + def logcdf(value, nu, mu, sigma): """ Compute the log of the cumulative distribution function for Student's T distribution at the specified value. @@ -1952,7 +1949,8 @@ def logcdf(value, nu, mu, lam): f"StudentT.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." ) - lam, sigma = get_tau_sigma(tau=lam, sigma=None) + lam, sigma = get_tau_sigma(sigma=sigma) + t = (value - mu) / sigma sqrt_t2_nu = at.sqrt(t ** 2 + nu) x = (t + sqrt_t2_nu) / (2.0 * sqrt_t2_nu) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index fb24998bdf..231772cb03 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -1309,12 +1309,26 @@ def test_t(self): {"nu": Rplus, "mu": R, "lam": Rplus}, lambda value, nu, mu, lam: sp.t.logpdf(value, nu, mu, lam ** -0.5), ) + self.check_logp( + StudentT, + R, + {"nu": Rplus, "mu": R, "sigma": Rplus}, + lambda value, nu, mu, sigma: sp.t.logpdf(value, nu, mu, sigma), + n_samples=5, # Just testing alternative parametrization + ) self.check_logcdf( StudentT, R, {"nu": Rplus, "mu": R, "lam": Rplus}, lambda value, nu, mu, lam: sp.t.logcdf(value, nu, mu, lam ** -0.5), - n_samples=10, + n_samples=10, # relies on slow incomplete beta + ) + self.check_logcdf( + StudentT, + R, + {"nu": Rplus, "mu": R, "sigma": Rplus}, + lambda value, nu, mu, sigma: sp.t.logcdf(value, nu, mu, sigma), + n_samples=5, # Just testing alternative parametrization ) def test_cauchy(self): diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index 0b7120f85e..6158f76d24 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -457,9 +457,9 @@ class TestGumbel(BaseTestDistribution): class TestStudentT(BaseTestDistribution): pymc_dist = pm.StudentT - pymc_dist_params = {"nu": 5.0, "mu": 0.0, "lam": 1.0} - expected_rv_op_params = {"nu": 5.0, "mu": 0.0, "lam": 1.0} - reference_dist_params = {"df": 5.0, "loc": 0.0, "scale": 1.0} + pymc_dist_params = {"nu": 5.0, "mu": -1.0, "sigma": 2.0} + expected_rv_op_params = {"nu": 5.0, "mu": -1.0, "sigma": 2.0} + reference_dist_params = {"df": 5.0, "loc": -1.0, "scale": 2.0} reference_dist = seeded_scipy_distribution_builder("t") tests_to_run = [ "check_pymc_params_match_rv_op", @@ -468,6 +468,16 @@ class TestStudentT(BaseTestDistribution): ] +class TestStudentTLam(BaseTestDistribution): + pymc_dist = pm.StudentT + lam, sigma = get_tau_sigma(tau=2.0) + pymc_dist_params = {"nu": 5.0, "mu": -1.0, "lam": lam} + expected_rv_op_params = {"nu": 5.0, "mu": -1.0, "lam": sigma} + reference_dist_params = {"df": 5.0, "loc": -1.0, "scale": sigma} + reference_dist = seeded_scipy_distribution_builder("t") + tests_to_run = ["check_pymc_params_match_rv_op"] + + class TestNormal(BaseTestDistribution): pymc_dist = pm.Normal pymc_dist_params = {"mu": 5.0, "sigma": 10.0}