Skip to content

Commit 6ee370a

Browse files
committed
Merge branch 'develop' into rtools44-gha
2 parents 32a2457 + ee65daa commit 6ee370a

File tree

12 files changed

+184
-52
lines changed

12 files changed

+184
-52
lines changed

make/compiler_flags

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,18 @@ endif
1717

1818
## Set OS specific library filename extensions
1919
ifeq ($(OS),Windows_NT)
20-
WINARM64 := $(shell echo | $(CXX) -E -dM - | findstr __aarch64__)
2120
LIBRARY_SUFFIX ?= .dll
21+
STR_SEARCH ?= findstr
2222
endif
2323

2424
ifeq ($(OS),Darwin)
2525
LIBRARY_SUFFIX ?= .dylib
26+
STR_SEARCH ?= grep
2627
endif
2728

2829
ifeq ($(OS),Linux)
2930
LIBRARY_SUFFIX ?= .so
31+
STR_SEARCH ?= grep
3032
endif
3133

3234
## Set default compiler
@@ -42,6 +44,11 @@ ifeq (default,$(origin CXX))
4244
endif
4345
endif
4446

47+
ARM64_CHECK := $(shell echo | $(CXX) -E -dM - | $(STR_SEARCH) __aarch64__)
48+
ifneq ($(ARM64_CHECK),)
49+
ARM64 = true
50+
endif
51+
4552
# Detect compiler type
4653
# - CXX_TYPE: {gcc, clang, mingw32-gcc, other}
4754
# - CXX_MAJOR: major version of CXX
@@ -165,7 +172,7 @@ ifeq ($(OS),Windows_NT)
165172

166173
make/ucrt:
167174
pound := \#
168-
UCRT_STRING := $(shell echo '$(pound)include <windows.h>' | $(CXX) -E -dM - | findstr _UCRT)
175+
UCRT_STRING := $(shell echo '$(pound)include <windows.h>' | $(CXX) -E -dM - | $(STR_SEARCH) _UCRT)
169176
ifneq (,$(UCRT_STRING))
170177
IS_UCRT ?= true
171178
else
@@ -212,6 +219,10 @@ endif
212219
## makes reentrant version lgamma_r available from cmath
213220
CXXFLAGS_OS += -D_REENTRANT
214221

222+
ifeq ($(ARM64), true)
223+
CXXFLAGS_OS += -ffp-contract=off
224+
endif
225+
215226
## silence warnings occuring due to the TBB and Eigen libraries
216227
CXXFLAGS_WARNINGS += -Wno-ignored-attributes
217228

@@ -276,7 +287,7 @@ endif
276287
LDFLAGS_TBB ?= -Wl,-L,"$(TBB_LIB)" -Wl,--disable-new-dtags
277288

278289
# Windows LLVM/Clang does not support -rpath, but is not needed on Windows anyway
279-
ifeq ($(WINARM64),)
290+
ifneq ($(OS), Windows_NT)
280291
LDFLAGS_TBB += -Wl,-rpath,"$(TBB_LIB)"
281292
endif
282293

@@ -300,7 +311,7 @@ CXXFLAGS_TBB ?= -I $(TBB)/include
300311
LDFLAGS_TBB ?= -Wl,-L,"$(TBB_BIN_ABSOLUTE_PATH)" $(LDFLAGS_FLTO_FLTO) $(LDFLAGS_OPTIM_TBB)
301312

302313
# Windows LLVM/Clang does not support -rpath, but is not needed on Windows anyway
303-
ifeq ($(WINARM64),)
314+
ifneq ($(OS), Windows_NT)
304315
LDFLAGS_TBB += -Wl,-rpath,"$(TBB_BIN_ABSOLUTE_PATH)"
305316
endif
306317
LDLIBS_TBB ?= -ltbb

make/libraries

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -141,10 +141,11 @@ ifeq (Windows_NT, $(OS))
141141
TBB_CXXFLAGS += -D_UCRT
142142
endif
143143
# TBB does not have assembly code for Windows ARM64, so we need to use GCC builtins
144-
ifneq ($(WINARM64),)
145-
TBB_CXXFLAGS += -DTBB_USE_GCC_BUILTINS
146-
CXXFLAGS_TBB += -DTBB_USE_GCC_BUILTINS
147-
endif
144+
ifeq ($(ARM64),true)
145+
TBB_CXXFLAGS += -DTBB_USE_GCC_BUILTINS
146+
CXXFLAGS_TBB += -DTBB_USE_GCC_BUILTINS
147+
WINARM64 = true
148+
endif
148149
SH_CHECK := $(shell command -v sh 2>/dev/null)
149150
ifdef SH_CHECK
150151
WINDOWS_HAS_SH ?= true

stan/math/fwd/functor/hessian.hpp

Lines changed: 49 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define STAN_MATH_FWD_FUNCTOR_HESSIAN_HPP
33

44
#include <stan/math/fwd/core.hpp>
5+
#include <stan/math/fwd/fun/value_of.hpp>
56
#include <stan/math/prim/fun/Eigen.hpp>
67

78
namespace stan {
@@ -14,6 +15,9 @@ namespace math {
1415
* mixed definition, which is faster for Hessians, is that this
1516
* version is itself differentiable.
1617
*
18+
* Instead of returning the full symmetric Hessian, we return the
19+
* lower-triangular only as a column-major compressed sparse matrix.
20+
*
1721
* <p>The functor must implement
1822
*
1923
* <code>
@@ -35,23 +39,27 @@ namespace math {
3539
* @param[in] x Argument to function
3640
* @param[out] fx Function applied to argument
3741
* @param[out] grad gradient of function at argument
38-
* @param[out] H Hessian of function at argument
42+
* @param[out] H Hessian of function at argument, as a lower-triangular
43+
* compressed sparse matrix
3944
*/
4045
template <typename T, typename F>
4146
void hessian(const F& f, const Eigen::Matrix<T, Eigen::Dynamic, 1>& x, T& fx,
4247
Eigen::Matrix<T, Eigen::Dynamic, 1>& grad,
43-
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>& H) {
44-
H.resize(x.size(), x.size());
45-
grad.resize(x.size());
46-
// size 0 separate because nothing to loop over in main body
47-
if (x.size() == 0) {
48-
fx = f(x);
48+
Eigen::SparseMatrix<T>& H) {
49+
int d = x.size();
50+
if (d == 0) {
51+
fx = value_of(f(x));
4952
return;
5053
}
51-
Eigen::Matrix<fvar<fvar<T> >, Eigen::Dynamic, 1> x_fvar(x.size());
52-
for (int i = 0; i < x.size(); ++i) {
53-
for (int j = i; j < x.size(); ++j) {
54-
for (int k = 0; k < x.size(); ++k) {
54+
55+
H.resize(d, d);
56+
H.reserve(Eigen::VectorXi::LinSpaced(d, 1, d).reverse());
57+
grad.resize(d);
58+
59+
Eigen::Matrix<fvar<fvar<T> >, Eigen::Dynamic, 1> x_fvar(d);
60+
for (int i = 0; i < d; ++i) {
61+
for (int j = i; j < d; ++j) {
62+
for (int k = 0; k < d; ++k) {
5563
x_fvar(k) = fvar<fvar<T> >(fvar<T>(x(k), j == k), fvar<T>(i == k, 0));
5664
}
5765
fvar<fvar<T> > fx_fvar = f(x_fvar);
@@ -61,10 +69,38 @@ void hessian(const F& f, const Eigen::Matrix<T, Eigen::Dynamic, 1>& x, T& fx,
6169
if (i == j) {
6270
grad(i) = fx_fvar.d_.val_;
6371
}
64-
H(i, j) = fx_fvar.d_.d_;
65-
H(j, i) = H(i, j);
72+
H.insert(j, i) = fx_fvar.d_.d_;
6673
}
6774
}
75+
H.makeCompressed();
76+
}
77+
78+
/**
79+
* Calculate the value, the gradient, and the Hessian,
80+
* of the specified function at the specified argument in
81+
* time O(N^3) time and O(N^2) space. The advantage over the
82+
* mixed definition, which is faster for Hessians, is that this
83+
* version is itself differentiable.
84+
*
85+
* Overload for returning the Hessian as a symmetric dense matrix.
86+
*
87+
* @tparam T type of elements in the vector and matrix
88+
* @tparam F type of function
89+
* @param[in] f Function
90+
* @param[in] x Argument to function
91+
* @param[out] fx Function applied to argument
92+
* @param[out] grad gradient of function at argument
93+
* @param[out] H Hessian of function at argument, as a symmetric matrix
94+
*/
95+
template <typename T, typename F>
96+
void hessian(const F& f, const Eigen::Matrix<T, Eigen::Dynamic, 1>& x, T& fx,
97+
Eigen::Matrix<T, Eigen::Dynamic, 1>& grad,
98+
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>& H) {
99+
Eigen::SparseMatrix<T> hess_sparse;
100+
hessian(f, x, fx, grad, hess_sparse);
101+
102+
H = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>(hess_sparse)
103+
.template selfadjointView<Eigen::Lower>();
68104
}
69105

70106
} // namespace math

stan/math/mix/functor/hessian.hpp

Lines changed: 44 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
#ifndef STAN_MATH_MIX_FUNCTOR_HESSIAN_HPP
22
#define STAN_MATH_MIX_FUNCTOR_HESSIAN_HPP
33

4-
#include <stan/math/fwd/core.hpp>
54
#include <stan/math/prim/fun/Eigen.hpp>
5+
#include <stan/math/fwd/core.hpp>
6+
#include <stan/math/fwd/fun/value_of_rec.hpp>
67
#include <stan/math/rev/core.hpp>
8+
#include <stan/math/rev/fun/value_of_rec.hpp>
79
#include <stdexcept>
810

911
namespace stan {
@@ -14,6 +16,9 @@ namespace math {
1416
* of the specified function at the specified argument in
1517
* O(N^2) time and O(N^2) space.
1618
*
19+
* Instead of returning the full symmetric Hessian, we return the
20+
* lower-triangular only as a column-major compressed sparse matrix.
21+
*
1722
* <p>The functor must implement
1823
*
1924
* <code>
@@ -36,20 +41,22 @@ namespace math {
3641
* @param[in] x Argument to function
3742
* @param[out] fx Function applied to argument
3843
* @param[out] grad gradient of function at argument
39-
* @param[out] H Hessian of function at argument
44+
* @param[out] H Hessian of function at argument, as a lower-triangular
45+
* compressed sparse matrix
4046
*/
4147
template <typename F>
42-
void hessian(const F& f, const Eigen::Matrix<double, Eigen::Dynamic, 1>& x,
43-
double& fx, Eigen::Matrix<double, Eigen::Dynamic, 1>& grad,
44-
Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>& H) {
45-
H.resize(x.size(), x.size());
46-
grad.resize(x.size());
47-
48-
// need to compute fx even with size = 0
49-
if (x.size() == 0) {
50-
fx = f(x);
48+
void hessian(const F& f, const Eigen::VectorXd& x, double& fx,
49+
Eigen::VectorXd& grad, Eigen::SparseMatrix<double>& H) {
50+
int d = x.size();
51+
if (d == 0) {
52+
fx = value_of_rec(f(x));
5153
return;
5254
}
55+
56+
grad.resize(d);
57+
H.resize(d, d);
58+
H.reserve(Eigen::VectorXi::LinSpaced(d, 1, d).reverse());
59+
5360
for (int i = 0; i < x.size(); ++i) {
5461
// Run nested autodiff in this scope
5562
nested_rev_autodiff nested;
@@ -64,10 +71,34 @@ void hessian(const F& f, const Eigen::Matrix<double, Eigen::Dynamic, 1>& x,
6471
fx = fx_fvar.val_.val();
6572
}
6673
stan::math::grad(fx_fvar.d_.vi_);
67-
for (int j = 0; j < x.size(); ++j) {
68-
H(i, j) = x_fvar(j).val_.adj();
74+
for (int j = i; j < x.size(); ++j) {
75+
H.insert(j, i) = x_fvar(j).val_.adj();
6976
}
7077
}
78+
H.makeCompressed();
79+
}
80+
81+
/**
82+
* Calculate the value, the gradient, and the Hessian,
83+
* of the specified function at the specified argument in
84+
* O(N^2) time and O(N^2) space.
85+
*
86+
* Overload for returning the Hessian as a symmetric dense matrix.
87+
*
88+
* @tparam F Type of function
89+
* @param[in] f Function
90+
* @param[in] x Argument to function
91+
* @param[out] fx Function applied to argument
92+
* @param[out] grad gradient of function at argument
93+
* @param[out] H Hessian of function at argument, as a symmetric matrix
94+
*/
95+
template <typename F>
96+
void hessian(const F& f, const Eigen::VectorXd& x, double& fx,
97+
Eigen::VectorXd& grad, Eigen::MatrixXd& H) {
98+
Eigen::SparseMatrix<double> hess_sparse;
99+
hessian(f, x, fx, grad, hess_sparse);
100+
101+
H = Eigen::MatrixXd(hess_sparse).selfadjointView<Eigen::Lower>();
71102
}
72103

73104
} // namespace math

stan/math/mix/functor/hessian_times_vector.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ void hessian_times_vector(const F& f,
4242
Eigen::Matrix<T, Eigen::Dynamic, 1>& Hv) {
4343
using Eigen::Matrix;
4444
Matrix<T, Eigen::Dynamic, 1> grad;
45-
Matrix<T, Eigen::Dynamic, Eigen::Dynamic> H;
45+
Eigen::SparseMatrix<T> H;
4646
hessian(f, x, fx, grad, H);
47-
Hv = H * v;
47+
Hv = H.template selfadjointView<Eigen::Lower>() * v;
4848
}
4949

5050
} // namespace math

stan/math/prim/fun/inv_sqrt.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,14 @@ inline auto inv_sqrt(const Container& x) {
6060
template <typename Container, require_not_var_matrix_t<Container>* = nullptr,
6161
require_container_st<std::is_arithmetic, Container>* = nullptr>
6262
inline auto inv_sqrt(const Container& x) {
63+
// Eigen 3.4.0 has precision issues on ARM64 with vectorised rsqrt
64+
// Resolved in current master branch, below can be removed on next release
65+
#ifdef __aarch64__
66+
return apply_scalar_unary<inv_sqrt_fun, Container>::apply(x);
67+
#else
6368
return apply_vector_unary<Container>::apply(
6469
x, [](const auto& v) { return v.array().rsqrt(); });
70+
#endif
6571
}
6672

6773
} // namespace math

0 commit comments

Comments
 (0)