Merge pull request #3064 from stan-dev/revert-3055-hess-sparse

SteveBronder · web-flow · commit 35d37cebb01a · 2024-05-10T11:11:43.000-04:00
Revert "Allow Hessian functors to return Hessian as compressed sparse matrix"
diff --git a/stan/math/fwd/functor/hessian.hpp b/stan/math/fwd/functor/hessian.hpp
@@ -2,7 +2,6 @@
 #define STAN_MATH_FWD_FUNCTOR_HESSIAN_HPP
 
 #include <stan/math/fwd/core.hpp>
-#include <stan/math/fwd/fun/value_of.hpp>
 #include <stan/math/prim/fun/Eigen.hpp>
 
 namespace stan {
@@ -15,9 +14,6 @@ namespace math {
  * mixed definition, which is faster for Hessians, is that this
  * version is itself differentiable.
  *
- * Instead of returning the full symmetric Hessian, we return the
- * lower-triangular only as a column-major compressed sparse matrix.
- *
  * <p>The functor must implement
  *
  * <code>
@@ -39,27 +35,23 @@ namespace math {
  * @param[in] x Argument to function
  * @param[out] fx Function applied to argument
  * @param[out] grad gradient of function at argument
- * @param[out] H Hessian of function at argument, as a lower-triangular
- *                      compressed sparse matrix
+ * @param[out] H Hessian of function at argument
  */
 template <typename T, typename F>
 void hessian(const F& f, const Eigen::Matrix<T, Eigen::Dynamic, 1>& x, T& fx,
              Eigen::Matrix<T, Eigen::Dynamic, 1>& grad,
-             Eigen::SparseMatrix<T>& H) {
-  int d = x.size();
-  if (d == 0) {
-    fx = value_of(f(x));
+             Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>& H) {
+  H.resize(x.size(), x.size());
+  grad.resize(x.size());
+  // size 0 separate because nothing to loop over in main body
+  if (x.size() == 0) {
+    fx = f(x);
     return;
   }
-
-  H.resize(d, d);
-  H.reserve(Eigen::VectorXi::LinSpaced(d, 1, d).reverse());
-  grad.resize(d);
-
-  Eigen::Matrix<fvar<fvar<T> >, Eigen::Dynamic, 1> x_fvar(d);
-  for (int i = 0; i < d; ++i) {
-    for (int j = i; j < d; ++j) {
-      for (int k = 0; k < d; ++k) {
+  Eigen::Matrix<fvar<fvar<T> >, Eigen::Dynamic, 1> x_fvar(x.size());
+  for (int i = 0; i < x.size(); ++i) {
+    for (int j = i; j < x.size(); ++j) {
+      for (int k = 0; k < x.size(); ++k) {
         x_fvar(k) = fvar<fvar<T> >(fvar<T>(x(k), j == k), fvar<T>(i == k, 0));
       }
       fvar<fvar<T> > fx_fvar = f(x_fvar);
@@ -69,38 +61,10 @@ void hessian(const F& f, const Eigen::Matrix<T, Eigen::Dynamic, 1>& x, T& fx,
       if (i == j) {
         grad(i) = fx_fvar.d_.val_;
       }
-      H.insert(j, i) = fx_fvar.d_.d_;
+      H(i, j) = fx_fvar.d_.d_;
+      H(j, i) = H(i, j);
     }
   }
-  H.makeCompressed();
-}
-
-/**
- * Calculate the value, the gradient, and the Hessian,
- * of the specified function at the specified argument in
- * time O(N^3) time and O(N^2) space.  The advantage over the
- * mixed definition, which is faster for Hessians, is that this
- * version is itself differentiable.
- *
- * Overload for returning the Hessian as a symmetric dense matrix.
- *
- * @tparam T type of elements in the vector and matrix
- * @tparam F type of function
- * @param[in] f Function
- * @param[in] x Argument to function
- * @param[out] fx Function applied to argument
- * @param[out] grad gradient of function at argument
- * @param[out] H Hessian of function at argument, as a symmetric matrix
- */
-template <typename T, typename F>
-void hessian(const F& f, const Eigen::Matrix<T, Eigen::Dynamic, 1>& x, T& fx,
-             Eigen::Matrix<T, Eigen::Dynamic, 1>& grad,
-             Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>& H) {
-  Eigen::SparseMatrix<T> hess_sparse;
-  hessian(f, x, fx, grad, hess_sparse);
-
-  H = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>(hess_sparse)
-          .template selfadjointView<Eigen::Lower>();
 }
 
 }  // namespace math
diff --git a/stan/math/mix/functor/hessian.hpp b/stan/math/mix/functor/hessian.hpp
@@ -1,11 +1,9 @@
 #ifndef STAN_MATH_MIX_FUNCTOR_HESSIAN_HPP
 #define STAN_MATH_MIX_FUNCTOR_HESSIAN_HPP
 
-#include <stan/math/prim/fun/Eigen.hpp>
 #include <stan/math/fwd/core.hpp>
-#include <stan/math/fwd/fun/value_of_rec.hpp>
+#include <stan/math/prim/fun/Eigen.hpp>
 #include <stan/math/rev/core.hpp>
-#include <stan/math/rev/fun/value_of_rec.hpp>
 #include <stdexcept>
 
 namespace stan {
@@ -16,9 +14,6 @@ namespace math {
  * of the specified function at the specified argument in
  * O(N^2) time and O(N^2) space.
  *
- * Instead of returning the full symmetric Hessian, we return the
- * lower-triangular only as a column-major compressed sparse matrix.
- *
  * <p>The functor must implement
  *
  * <code>
@@ -41,22 +36,20 @@ namespace math {
  * @param[in] x Argument to function
  * @param[out] fx Function applied to argument
  * @param[out] grad gradient of function at argument
- * @param[out] H Hessian of function at argument, as a lower-triangular
- *                      compressed sparse matrix
+ * @param[out] H Hessian of function at argument
  */
 template <typename F>
-void hessian(const F& f, const Eigen::VectorXd& x, double& fx,
-             Eigen::VectorXd& grad, Eigen::SparseMatrix<double>& H) {
-  int d = x.size();
-  if (d == 0) {
-    fx = value_of_rec(f(x));
+void hessian(const F& f, const Eigen::Matrix<double, Eigen::Dynamic, 1>& x,
+             double& fx, Eigen::Matrix<double, Eigen::Dynamic, 1>& grad,
+             Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>& H) {
+  H.resize(x.size(), x.size());
+  grad.resize(x.size());
+
+  // need to compute fx even with size = 0
+  if (x.size() == 0) {
+    fx = f(x);
     return;
   }
-
-  grad.resize(d);
-  H.resize(d, d);
-  H.reserve(Eigen::VectorXi::LinSpaced(d, 1, d).reverse());
-
   for (int i = 0; i < x.size(); ++i) {
     // Run nested autodiff in this scope
     nested_rev_autodiff nested;
@@ -71,34 +64,10 @@ void hessian(const F& f, const Eigen::VectorXd& x, double& fx,
       fx = fx_fvar.val_.val();
     }
     stan::math::grad(fx_fvar.d_.vi_);
-    for (int j = i; j < x.size(); ++j) {
-      H.insert(j, i) = x_fvar(j).val_.adj();
+    for (int j = 0; j < x.size(); ++j) {
+      H(i, j) = x_fvar(j).val_.adj();
     }
   }
-  H.makeCompressed();
-}
-
-/**
- * Calculate the value, the gradient, and the Hessian,
- * of the specified function at the specified argument in
- * O(N^2) time and O(N^2) space.
- *
- * Overload for returning the Hessian as a symmetric dense matrix.
- *
- * @tparam F Type of function
- * @param[in] f Function
- * @param[in] x Argument to function
- * @param[out] fx Function applied to argument
- * @param[out] grad gradient of function at argument
- * @param[out] H Hessian of function at argument, as a symmetric matrix
- */
-template <typename F>
-void hessian(const F& f, const Eigen::VectorXd& x, double& fx,
-             Eigen::VectorXd& grad, Eigen::MatrixXd& H) {
-  Eigen::SparseMatrix<double> hess_sparse;
-  hessian(f, x, fx, grad, hess_sparse);
-
-  H = Eigen::MatrixXd(hess_sparse).selfadjointView<Eigen::Lower>();
 }
 
 }  // namespace math
diff --git a/stan/math/mix/functor/hessian_times_vector.hpp b/stan/math/mix/functor/hessian_times_vector.hpp
@@ -42,9 +42,9 @@ void hessian_times_vector(const F& f,
                           Eigen::Matrix<T, Eigen::Dynamic, 1>& Hv) {
   using Eigen::Matrix;
   Matrix<T, Eigen::Dynamic, 1> grad;
-  Eigen::SparseMatrix<T> H;
+  Matrix<T, Eigen::Dynamic, Eigen::Dynamic> H;
   hessian(f, x, fx, grad, H);
-  Hv = H.template selfadjointView<Eigen::Lower>() * v;
+  Hv = H * v;
 }
 
 }  // namespace math
diff --git a/stan/math/rev/functor/finite_diff_hessian_auto.hpp b/stan/math/rev/functor/finite_diff_hessian_auto.hpp
@@ -3,7 +3,6 @@
 
 #include <stan/math/rev/meta.hpp>
 #include <stan/math/rev/core.hpp>
-#include <stan/math/rev/fun/value_of.hpp>
 #include <stan/math/prim/fun/Eigen.hpp>
 #include <stan/math/rev/functor.hpp>
 #include <stan/math/prim/fun/finite_diff_stepsize.hpp>
@@ -18,15 +17,10 @@ namespace internal {
  * automatically setting the stepsize between the function evaluations
  * along a dimension.
  *
- * Instead of returning the full symmetric Hessian, we return the
- * lower-triangular only as a column-major compressed sparse matrix.
- *
  * <p>The functor must implement
  *
  * <code>
- * var
- * operator()(const
- * Eigen::Matrix<var, Eigen::Dynamic, 1>&)
+ * double operator()(const Eigen::VectorXd&)
  * </code>
  *
  * <p>For details of the algorithm, see
@@ -43,24 +37,18 @@ namespace internal {
  * @param[in] x Argument to function
  * @param[out] fx Function applied to argument
  * @param[out] grad_fx Gradient of function at argument
- * @param[out] hess_fx Hessian of function at argument, as a lower-triangular
- *                      compressed sparse matrix
+ * @param[out] hess_fx Hessian of function at argument
  */
 template <typename F>
 void finite_diff_hessian_auto(const F& f, const Eigen::VectorXd& x, double& fx,
                               Eigen::VectorXd& grad_fx,
-                              Eigen::SparseMatrix<double>& hess_fx) {
+                              Eigen::MatrixXd& hess_fx) {
   int d = x.size();
-  if (d == 0) {
-    fx = value_of(f(x));
-    return;
-  }
-
-  gradient(f, x, fx, grad_fx);
 
   Eigen::VectorXd x_temp(x);
   hess_fx.resize(d, d);
-  hess_fx.reserve(Eigen::VectorXi::LinSpaced(d, 1, d).reverse());
+
+  gradient(f, x, fx, grad_fx);
 
   std::vector<Eigen::VectorXd> g_plus(d);
   std::vector<Eigen::VectorXd> g_minus(d);
@@ -86,39 +74,12 @@ void finite_diff_hessian_auto(const F& f, const Eigen::VectorXd& x, double& fx,
   // approximate the hessian as a finite difference of gradients
   for (int i = 0; i < d; ++i) {
     for (int j = i; j < d; ++j) {
-      hess_fx.insert(j, i)
-          = (g_plus[j](i) - g_minus[j](i)) / (4 * epsilons[j])
-            + (g_plus[i](j) - g_minus[i](j)) / (4 * epsilons[i]);
+      hess_fx(j, i) = (g_plus[j](i) - g_minus[j](i)) / (4 * epsilons[j])
+                      + (g_plus[i](j) - g_minus[i](j)) / (4 * epsilons[i]);
+      hess_fx(i, j) = hess_fx(j, i);
     }
   }
-  hess_fx.makeCompressed();
-}
-
-/**
- * Calculate the value and the Hessian of the specified function at
- * the specified argument using first-order finite difference of gradients,
- * automatically setting the stepsize between the function evaluations
- * along a dimension.
- *
- * Overload for returning the Hessian as a symmetric dense matrix.
- *
- * @tparam F Type of function
- * @param[in] f Function
- * @param[in] x Argument to function
- * @param[out] fx Function applied to argument
- * @param[out] grad_fx Gradient of function at argument
- * @param[out] hess_fx Hessian of function at argument, as a symmetric matrix
- */
-template <typename F>
-void finite_diff_hessian_auto(const F& f, const Eigen::VectorXd& x, double& fx,
-                              Eigen::VectorXd& grad_fx,
-                              Eigen::MatrixXd& hess_fx) {
-  Eigen::SparseMatrix<double> hess_sparse;
-  finite_diff_hessian_auto(f, x, fx, grad_fx, hess_sparse);
-
-  hess_fx = Eigen::MatrixXd(hess_sparse).selfadjointView<Eigen::Lower>();
 }
-
 }  // namespace internal
 }  // namespace math
 }  // namespace stan
diff --git a/test/unit/math/rev/functor/finite_diff_hessian_auto_test.cpp b/test/unit/math/rev/functor/finite_diff_hessian_auto_test.cpp
@@ -56,7 +56,6 @@ struct exp_full {
 struct one_arg {
   template <typename T>
   inline T operator()(const Matrix<T, Dynamic, 1>& x) const {
-    using stan::math::pow;
     return pow(x(0), 3);
   }
 };

Original file line number	Diff line number	Diff line change
`@@ -56,7 +56,6 @@ struct exp_full {`
`56`	`56`	`struct one_arg {`
`57`	`57`	`template <typename T>`
`58`	`58`	`inline T operator()(const Matrix<T, Dynamic, 1>& x) const {`
`59`		`- using stan::math::pow;`
`60`	`59`	`return pow(x(0), 3);`
`61`	`60`	`}`
`62`	`61`	`};`