Skip to content

Commit ee65daa

Browse files
authored
Merge pull request #3055 from stan-dev/hess-sparse
Allow Hessian functors to return Hessian as compressed sparse matrix
2 parents b2b2ad8 + 78c0533 commit ee65daa

File tree

5 files changed

+143
-36
lines changed

5 files changed

+143
-36
lines changed

stan/math/fwd/functor/hessian.hpp

Lines changed: 49 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define STAN_MATH_FWD_FUNCTOR_HESSIAN_HPP
33

44
#include <stan/math/fwd/core.hpp>
5+
#include <stan/math/fwd/fun/value_of.hpp>
56
#include <stan/math/prim/fun/Eigen.hpp>
67

78
namespace stan {
@@ -14,6 +15,9 @@ namespace math {
1415
* mixed definition, which is faster for Hessians, is that this
1516
* version is itself differentiable.
1617
*
18+
* Instead of returning the full symmetric Hessian, we return the
19+
* lower-triangular only as a column-major compressed sparse matrix.
20+
*
1721
* <p>The functor must implement
1822
*
1923
* <code>
@@ -35,23 +39,27 @@ namespace math {
3539
* @param[in] x Argument to function
3640
* @param[out] fx Function applied to argument
3741
* @param[out] grad gradient of function at argument
38-
* @param[out] H Hessian of function at argument
42+
* @param[out] H Hessian of function at argument, as a lower-triangular
43+
* compressed sparse matrix
3944
*/
4045
template <typename T, typename F>
4146
void hessian(const F& f, const Eigen::Matrix<T, Eigen::Dynamic, 1>& x, T& fx,
4247
Eigen::Matrix<T, Eigen::Dynamic, 1>& grad,
43-
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>& H) {
44-
H.resize(x.size(), x.size());
45-
grad.resize(x.size());
46-
// size 0 separate because nothing to loop over in main body
47-
if (x.size() == 0) {
48-
fx = f(x);
48+
Eigen::SparseMatrix<T>& H) {
49+
int d = x.size();
50+
if (d == 0) {
51+
fx = value_of(f(x));
4952
return;
5053
}
51-
Eigen::Matrix<fvar<fvar<T> >, Eigen::Dynamic, 1> x_fvar(x.size());
52-
for (int i = 0; i < x.size(); ++i) {
53-
for (int j = i; j < x.size(); ++j) {
54-
for (int k = 0; k < x.size(); ++k) {
54+
55+
H.resize(d, d);
56+
H.reserve(Eigen::VectorXi::LinSpaced(d, 1, d).reverse());
57+
grad.resize(d);
58+
59+
Eigen::Matrix<fvar<fvar<T> >, Eigen::Dynamic, 1> x_fvar(d);
60+
for (int i = 0; i < d; ++i) {
61+
for (int j = i; j < d; ++j) {
62+
for (int k = 0; k < d; ++k) {
5563
x_fvar(k) = fvar<fvar<T> >(fvar<T>(x(k), j == k), fvar<T>(i == k, 0));
5664
}
5765
fvar<fvar<T> > fx_fvar = f(x_fvar);
@@ -61,10 +69,38 @@ void hessian(const F& f, const Eigen::Matrix<T, Eigen::Dynamic, 1>& x, T& fx,
6169
if (i == j) {
6270
grad(i) = fx_fvar.d_.val_;
6371
}
64-
H(i, j) = fx_fvar.d_.d_;
65-
H(j, i) = H(i, j);
72+
H.insert(j, i) = fx_fvar.d_.d_;
6673
}
6774
}
75+
H.makeCompressed();
76+
}
77+
78+
/**
79+
* Calculate the value, the gradient, and the Hessian,
80+
* of the specified function at the specified argument in
81+
* time O(N^3) time and O(N^2) space. The advantage over the
82+
* mixed definition, which is faster for Hessians, is that this
83+
* version is itself differentiable.
84+
*
85+
* Overload for returning the Hessian as a symmetric dense matrix.
86+
*
87+
* @tparam T type of elements in the vector and matrix
88+
* @tparam F type of function
89+
* @param[in] f Function
90+
* @param[in] x Argument to function
91+
* @param[out] fx Function applied to argument
92+
* @param[out] grad gradient of function at argument
93+
* @param[out] H Hessian of function at argument, as a symmetric matrix
94+
*/
95+
template <typename T, typename F>
96+
void hessian(const F& f, const Eigen::Matrix<T, Eigen::Dynamic, 1>& x, T& fx,
97+
Eigen::Matrix<T, Eigen::Dynamic, 1>& grad,
98+
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>& H) {
99+
Eigen::SparseMatrix<T> hess_sparse;
100+
hessian(f, x, fx, grad, hess_sparse);
101+
102+
H = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>(hess_sparse)
103+
.template selfadjointView<Eigen::Lower>();
68104
}
69105

70106
} // namespace math

stan/math/mix/functor/hessian.hpp

Lines changed: 44 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
#ifndef STAN_MATH_MIX_FUNCTOR_HESSIAN_HPP
22
#define STAN_MATH_MIX_FUNCTOR_HESSIAN_HPP
33

4-
#include <stan/math/fwd/core.hpp>
54
#include <stan/math/prim/fun/Eigen.hpp>
5+
#include <stan/math/fwd/core.hpp>
6+
#include <stan/math/fwd/fun/value_of_rec.hpp>
67
#include <stan/math/rev/core.hpp>
8+
#include <stan/math/rev/fun/value_of_rec.hpp>
79
#include <stdexcept>
810

911
namespace stan {
@@ -14,6 +16,9 @@ namespace math {
1416
* of the specified function at the specified argument in
1517
* O(N^2) time and O(N^2) space.
1618
*
19+
* Instead of returning the full symmetric Hessian, we return the
20+
* lower-triangular only as a column-major compressed sparse matrix.
21+
*
1722
* <p>The functor must implement
1823
*
1924
* <code>
@@ -36,20 +41,22 @@ namespace math {
3641
* @param[in] x Argument to function
3742
* @param[out] fx Function applied to argument
3843
* @param[out] grad gradient of function at argument
39-
* @param[out] H Hessian of function at argument
44+
* @param[out] H Hessian of function at argument, as a lower-triangular
45+
* compressed sparse matrix
4046
*/
4147
template <typename F>
42-
void hessian(const F& f, const Eigen::Matrix<double, Eigen::Dynamic, 1>& x,
43-
double& fx, Eigen::Matrix<double, Eigen::Dynamic, 1>& grad,
44-
Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>& H) {
45-
H.resize(x.size(), x.size());
46-
grad.resize(x.size());
47-
48-
// need to compute fx even with size = 0
49-
if (x.size() == 0) {
50-
fx = f(x);
48+
void hessian(const F& f, const Eigen::VectorXd& x, double& fx,
49+
Eigen::VectorXd& grad, Eigen::SparseMatrix<double>& H) {
50+
int d = x.size();
51+
if (d == 0) {
52+
fx = value_of_rec(f(x));
5153
return;
5254
}
55+
56+
grad.resize(d);
57+
H.resize(d, d);
58+
H.reserve(Eigen::VectorXi::LinSpaced(d, 1, d).reverse());
59+
5360
for (int i = 0; i < x.size(); ++i) {
5461
// Run nested autodiff in this scope
5562
nested_rev_autodiff nested;
@@ -64,10 +71,34 @@ void hessian(const F& f, const Eigen::Matrix<double, Eigen::Dynamic, 1>& x,
6471
fx = fx_fvar.val_.val();
6572
}
6673
stan::math::grad(fx_fvar.d_.vi_);
67-
for (int j = 0; j < x.size(); ++j) {
68-
H(i, j) = x_fvar(j).val_.adj();
74+
for (int j = i; j < x.size(); ++j) {
75+
H.insert(j, i) = x_fvar(j).val_.adj();
6976
}
7077
}
78+
H.makeCompressed();
79+
}
80+
81+
/**
82+
* Calculate the value, the gradient, and the Hessian,
83+
* of the specified function at the specified argument in
84+
* O(N^2) time and O(N^2) space.
85+
*
86+
* Overload for returning the Hessian as a symmetric dense matrix.
87+
*
88+
* @tparam F Type of function
89+
* @param[in] f Function
90+
* @param[in] x Argument to function
91+
* @param[out] fx Function applied to argument
92+
* @param[out] grad gradient of function at argument
93+
* @param[out] H Hessian of function at argument, as a symmetric matrix
94+
*/
95+
template <typename F>
96+
void hessian(const F& f, const Eigen::VectorXd& x, double& fx,
97+
Eigen::VectorXd& grad, Eigen::MatrixXd& H) {
98+
Eigen::SparseMatrix<double> hess_sparse;
99+
hessian(f, x, fx, grad, hess_sparse);
100+
101+
H = Eigen::MatrixXd(hess_sparse).selfadjointView<Eigen::Lower>();
71102
}
72103

73104
} // namespace math

stan/math/mix/functor/hessian_times_vector.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ void hessian_times_vector(const F& f,
4242
Eigen::Matrix<T, Eigen::Dynamic, 1>& Hv) {
4343
using Eigen::Matrix;
4444
Matrix<T, Eigen::Dynamic, 1> grad;
45-
Matrix<T, Eigen::Dynamic, Eigen::Dynamic> H;
45+
Eigen::SparseMatrix<T> H;
4646
hessian(f, x, fx, grad, H);
47-
Hv = H * v;
47+
Hv = H.template selfadjointView<Eigen::Lower>() * v;
4848
}
4949

5050
} // namespace math

stan/math/rev/functor/finite_diff_hessian_auto.hpp

Lines changed: 47 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
#include <stan/math/rev/meta.hpp>
55
#include <stan/math/rev/core.hpp>
6+
#include <stan/math/rev/fun/value_of.hpp>
67
#include <stan/math/prim/fun/Eigen.hpp>
78
#include <stan/math/rev/functor.hpp>
89
#include <stan/math/prim/fun/finite_diff_stepsize.hpp>
@@ -17,10 +18,15 @@ namespace internal {
1718
* automatically setting the stepsize between the function evaluations
1819
* along a dimension.
1920
*
21+
* Instead of returning the full symmetric Hessian, we return the
22+
* lower-triangular only as a column-major compressed sparse matrix.
23+
*
2024
* <p>The functor must implement
2125
*
2226
* <code>
23-
* double operator()(const Eigen::VectorXd&)
27+
* var
28+
* operator()(const
29+
* Eigen::Matrix<var, Eigen::Dynamic, 1>&)
2430
* </code>
2531
*
2632
* <p>For details of the algorithm, see
@@ -37,18 +43,24 @@ namespace internal {
3743
* @param[in] x Argument to function
3844
* @param[out] fx Function applied to argument
3945
* @param[out] grad_fx Gradient of function at argument
40-
* @param[out] hess_fx Hessian of function at argument
46+
* @param[out] hess_fx Hessian of function at argument, as a lower-triangular
47+
* compressed sparse matrix
4148
*/
4249
template <typename F>
4350
void finite_diff_hessian_auto(const F& f, const Eigen::VectorXd& x, double& fx,
4451
Eigen::VectorXd& grad_fx,
45-
Eigen::MatrixXd& hess_fx) {
52+
Eigen::SparseMatrix<double>& hess_fx) {
4653
int d = x.size();
54+
if (d == 0) {
55+
fx = value_of(f(x));
56+
return;
57+
}
58+
59+
gradient(f, x, fx, grad_fx);
4760

4861
Eigen::VectorXd x_temp(x);
4962
hess_fx.resize(d, d);
50-
51-
gradient(f, x, fx, grad_fx);
63+
hess_fx.reserve(Eigen::VectorXi::LinSpaced(d, 1, d).reverse());
5264

5365
std::vector<Eigen::VectorXd> g_plus(d);
5466
std::vector<Eigen::VectorXd> g_minus(d);
@@ -74,12 +86,39 @@ void finite_diff_hessian_auto(const F& f, const Eigen::VectorXd& x, double& fx,
7486
// approximate the hessian as a finite difference of gradients
7587
for (int i = 0; i < d; ++i) {
7688
for (int j = i; j < d; ++j) {
77-
hess_fx(j, i) = (g_plus[j](i) - g_minus[j](i)) / (4 * epsilons[j])
78-
+ (g_plus[i](j) - g_minus[i](j)) / (4 * epsilons[i]);
79-
hess_fx(i, j) = hess_fx(j, i);
89+
hess_fx.insert(j, i)
90+
= (g_plus[j](i) - g_minus[j](i)) / (4 * epsilons[j])
91+
+ (g_plus[i](j) - g_minus[i](j)) / (4 * epsilons[i]);
8092
}
8193
}
94+
hess_fx.makeCompressed();
95+
}
96+
97+
/**
98+
* Calculate the value and the Hessian of the specified function at
99+
* the specified argument using first-order finite difference of gradients,
100+
* automatically setting the stepsize between the function evaluations
101+
* along a dimension.
102+
*
103+
* Overload for returning the Hessian as a symmetric dense matrix.
104+
*
105+
* @tparam F Type of function
106+
* @param[in] f Function
107+
* @param[in] x Argument to function
108+
* @param[out] fx Function applied to argument
109+
* @param[out] grad_fx Gradient of function at argument
110+
* @param[out] hess_fx Hessian of function at argument, as a symmetric matrix
111+
*/
112+
template <typename F>
113+
void finite_diff_hessian_auto(const F& f, const Eigen::VectorXd& x, double& fx,
114+
Eigen::VectorXd& grad_fx,
115+
Eigen::MatrixXd& hess_fx) {
116+
Eigen::SparseMatrix<double> hess_sparse;
117+
finite_diff_hessian_auto(f, x, fx, grad_fx, hess_sparse);
118+
119+
hess_fx = Eigen::MatrixXd(hess_sparse).selfadjointView<Eigen::Lower>();
82120
}
121+
83122
} // namespace internal
84123
} // namespace math
85124
} // namespace stan

test/unit/math/rev/functor/finite_diff_hessian_auto_test.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ struct exp_full {
5656
struct one_arg {
5757
template <typename T>
5858
inline T operator()(const Matrix<T, Dynamic, 1>& x) const {
59+
using stan::math::pow;
5960
return pow(x(0), 3);
6061
}
6162
};

0 commit comments

Comments
 (0)