pymc-devs · ricardoV94 · Jan 4, 2022 · Nov 30, 2021 · Nov 30, 2021 · Nov 30, 2021
diff --git a/pymc/find_optim_prior.py b/pymc/find_optim_prior.py
@@ -11,6 +11,7 @@
 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
+import warnings
 
 from typing import Dict, Optional
 
@@ -68,18 +69,11 @@ def find_optim_prior(
     The optimized distribution parameters as a dictionary with the parameters'
     name as key and the optimized value as value.
     """
-    if len(init_guess) > 2:
-        if (fixed_params is None) or (len(fixed_params) < (len(pm_dist.rv_op.ndims_params) - 2)):
-            raise NotImplementedError(
-                "This function can only optimize two parameters. "
-                f"{pm_dist} has {len(pm_dist.rv_op.ndims_params)} parameters. "
-                f"You need to fix {len(pm_dist.rv_op.ndims_params) - 2} parameters in the "
-                "`fixed_params` dictionary."
-            )
-    elif (len(init_guess) < 2) and (len(init_guess) < len(pm_dist.rv_op.ndims_params)):
-        raise ValueError(
-            f"{pm_dist} has {len(pm_dist.rv_op.ndims_params)} parameters, but you provided only "
-            f"{len(init_guess)} initial guess. You need to provide 2."
+    # exit when any parameter is not scalar:
+    if np.any(np.asarray(pm_dist.rv_op.ndims_params) != 0):
+        raise NotImplementedError(
+            "`pm.find_optim_prior` does not work with non-scalar parameters yet.\n"
+            "Feel free to open a pull request on PyMC repo if you really need this feature."
         )
 
     dist_params = aet.vector("dist_params")
@@ -99,12 +93,11 @@ def find_optim_prior(
     except AttributeError:
         raise AttributeError(
             f"You cannot use `find_optim_prior` with {pm_dist} -- it doesn't have a logcdf "
-            "method yet. Open an issue or, even better, a pull request on PyMC repo if you really "
+            "method yet.\nOpen an issue or, even better, a pull request on PyMC repo if you really "
             "need it."
         )
 
-    alpha = 1 - mass
-    out = [logcdf_lower - np.log(alpha / 2), logcdf_upper - np.log(1 - alpha / 2)]
+    out = pm.math.logdiffexp(logcdf_upper, logcdf_lower) - np.log(mass)
     logcdf = aesara.function([dist_params, lower_, upper_], out)
 
     try:
@@ -119,11 +112,24 @@ def find_optim_prior(
     if not opt.success:
         raise ValueError("Optimization of parameters failed.")
 
+    # save optimal parameters
+    opt_params = {
+        param_name: param_value for param_name, param_value in zip(init_guess.keys(), opt.x)
+    }
     if fixed_params is not None:
-        return {
-            param_name: param_value for param_name, param_value in zip(init_guess.keys(), opt.x)
-        } | fixed_params
-    else:
-        return {
-            param_name: param_value for param_name, param_value in zip(init_guess.keys(), opt.x)
-        }
+        opt_params.update(fixed_params)
+
+    # check mass in interval is not too far from `mass`
+    opt_dist = pm_dist.dist(**opt_params)
+    mass_in_interval = (
+        pm.math.exp(pm.logcdf(opt_dist, upper)) - pm.math.exp(pm.logcdf(opt_dist, lower))
+    ).eval()
+    if (np.abs(mass_in_interval - mass)) >= 0.01:
+        warnings.warn(
+            f"Final optimization has {mass_in_interval * 100:.0f}% of probability mass between "
+            f"{lower} and {upper} instead of the requested {mass * 100:.0f}%.\n"
+            "You may need to use a more flexible distribution, change the fixed parameters in the "
+            "`fixed_params` dictionary, or provide better initial guesses."
+        )
+
+    return opt_params
diff --git a/pymc/tests/test_util.py b/pymc/tests/test_util.py
@@ -147,25 +147,23 @@ def fn(a=UNSET):
 def test_find_optim_prior():
     MASS = 0.95
 
-    # normal case
+    # Gamma, normal case
     opt_params = pm.find_optim_prior(
         pm.Gamma, lower=0.1, upper=0.4, mass=MASS, init_guess={"alpha": 1, "beta": 10}
     )
-    np.testing.assert_allclose(np.asarray(opt_params.values()), np.array([8.47481597, 37.65435601]))
+    np.testing.assert_allclose(
+        list(opt_params.values()), np.array([8.506023352404027, 37.59626616198404])
+    )
 
-    # normal case, other distribution
+    # Normal, normal case
     opt_params = pm.find_optim_prior(
         pm.Normal, lower=155, upper=180, mass=MASS, init_guess={"mu": 170, "sigma": 3}
     )
-    np.testing.assert_allclose(np.asarray(opt_params.values()), np.array([167.5000001, 6.37766828]))
-
-    # 1-param case
-    opt_params = pm.find_optim_prior(
-        pm.Exponential, lower=0.1, upper=0.4, mass=MASS, init_guess={"lam": 10}
+    np.testing.assert_allclose(
+        list(opt_params.values()), np.array([170.76059047372624, 5.542895384602784])
     )
-    np.testing.assert_allclose(np.asarray(opt_params.values()), np.array([0.79929324]))
 
-    # 3-param case
+    # Student, works as expected
     opt_params = pm.find_optim_prior(
         pm.StudentT,
         lower=0.1,
@@ -174,9 +172,36 @@ def test_find_optim_prior():
         init_guess={"mu": 170, "sigma": 3},
         fixed_params={"nu": 7},
     )
-    np.testing.assert_allclose(np.asarray(opt_params.values()), np.array([0.25, 0.06343503]))
+    assert "nu" in opt_params
+    np.testing.assert_allclose(
+        list(opt_params.values()), np.array([0.24995405785756986, 0.06343501657095188, 7])
+    )
 
-    with pytest.raises(ValueError, match="parameters, but you provided only"):
+    # Student not deterministic but without warning
+    with pytest.warns(None) as record:
+        pm.find_optim_prior(
+            pm.StudentT,
+            lower=0,
+            upper=1,
+            mass=MASS,
+            init_guess={"mu": 5, "sigma": 2, "nu": 7},
+        )
+    assert len(record) == 0
+
+    # Exponential without warning
+    with pytest.warns(None) as record:
+        opt_params = pm.find_optim_prior(
+            pm.Exponential, lower=0, upper=1, mass=MASS, init_guess={"lam": 1}
+        )
+    assert len(record) == 0
+    np.testing.assert_allclose(list(opt_params.values()), np.array([2.9957322673241604]))
+
+    # Exponential too constraining
+    with pytest.warns(UserWarning, match="instead of the requested 95%"):
+        pm.find_optim_prior(pm.Exponential, lower=0.1, upper=1, mass=MASS, init_guess={"lam": 1})
+
+    # Gamma too constraining
+    with pytest.warns(UserWarning, match="instead of the requested 95%"):
         pm.find_optim_prior(
             pm.Gamma,
             lower=0.1,
@@ -186,16 +211,18 @@ def test_find_optim_prior():
             fixed_params={"beta": 10},
         )
 
+    # missing param
     with pytest.raises(TypeError, match="required positional argument"):
         pm.find_optim_prior(
             pm.StudentT, lower=0.1, upper=0.4, mass=MASS, init_guess={"mu": 170, "sigma": 3}
         )
 
-    with pytest.raises(NotImplementedError, match="This function can only optimize two parameters"):
+    # non-scalar params
+    with pytest.raises(NotImplementedError, match="does not work with non-scalar parameters yet"):
         pm.find_optim_prior(
-            pm.StudentT,
-            lower=0.1,
-            upper=0.4,
+            pm.MvNormal,
+            lower=0,
+            upper=1,
             mass=MASS,
-            init_guess={"mu": 170, "sigma": 3, "nu": 7},
+            init_guess={"mu": 5, "cov": np.asarray([[1, 0.2], [0.2, 1]])},
         )