stan-dev
diff --git a/‎make/compiler_flags
Lines changed: 15 additions & 4 deletions b/‎make/compiler_flags
Lines changed: 15 additions & 4 deletions
diff --git a/‎make/libraries
Lines changed: 5 additions & 4 deletions b/‎make/libraries
Lines changed: 5 additions & 4 deletions
diff --git a/‎stan/math/fwd/functor/hessian.hpp
Lines changed: 49 additions & 13 deletions b/‎stan/math/fwd/functor/hessian.hpp
Lines changed: 49 additions & 13 deletions
diff --git a/‎stan/math/mix/functor/hessian.hpp
Lines changed: 44 additions & 13 deletions b/‎stan/math/mix/functor/hessian.hpp
Lines changed: 44 additions & 13 deletions
diff --git a/‎stan/math/mix/functor/hessian_times_vector.hpp
Lines changed: 2 additions & 2 deletions b/‎stan/math/mix/functor/hessian_times_vector.hpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎stan/math/prim/fun/inv_sqrt.hpp
Lines changed: 6 additions & 0 deletions b/‎stan/math/prim/fun/inv_sqrt.hpp
Lines changed: 6 additions & 0 deletions
@@ -17,16 +17,18 @@ endif
 
 ## Set OS specific library filename extensions
 ifeq ($(OS),Windows_NT)
-  WINARM64 := $(shell echo | $(CXX) -E -dM -  | findstr __aarch64__)
   LIBRARY_SUFFIX ?= .dll
+  STR_SEARCH ?= findstr
 endif
 
 ifeq ($(OS),Darwin)
   LIBRARY_SUFFIX ?= .dylib
+  STR_SEARCH ?= grep
 endif
 
 ifeq ($(OS),Linux)
   LIBRARY_SUFFIX ?= .so
+  STR_SEARCH ?= grep
 endif
 
 ## Set default compiler
@@ -42,6 +44,11 @@ ifeq (default,$(origin CXX))
   endif
 endif
 
+ARM64_CHECK := $(shell echo | $(CXX) -E -dM -  | $(STR_SEARCH) __aarch64__)
+ifneq ($(ARM64_CHECK),)
+  ARM64 = true
+endif
+
 # Detect compiler type
 # - CXX_TYPE: {gcc, clang, mingw32-gcc, other}
 # - CXX_MAJOR: major version of CXX
@@ -165,7 +172,7 @@ ifeq ($(OS),Windows_NT)
 
   make/ucrt:
     pound := \#
-    UCRT_STRING := $(shell echo '$(pound)include <windows.h>' | $(CXX) -E -dM -  | findstr _UCRT)
+    UCRT_STRING := $(shell echo '$(pound)include <windows.h>' | $(CXX) -E -dM -  | $(STR_SEARCH) _UCRT)
     ifneq (,$(UCRT_STRING))
       IS_UCRT ?= true
     else
@@ -212,6 +219,10 @@ endif
 ## makes reentrant version lgamma_r available from cmath
 CXXFLAGS_OS += -D_REENTRANT
 
+ifeq ($(ARM64), true)
+  CXXFLAGS_OS += -ffp-contract=off
+endif
+
 ## silence warnings occuring due to the TBB and Eigen libraries
 CXXFLAGS_WARNINGS += -Wno-ignored-attributes
 
@@ -276,7 +287,7 @@ endif
 LDFLAGS_TBB ?= -Wl,-L,"$(TBB_LIB)" -Wl,--disable-new-dtags
 
 # Windows LLVM/Clang does not support -rpath, but is not needed on Windows anyway
-ifeq ($(WINARM64),)
+ifneq ($(OS), Windows_NT)
   LDFLAGS_TBB += -Wl,-rpath,"$(TBB_LIB)"
 endif
 
@@ -300,7 +311,7 @@ CXXFLAGS_TBB ?= -I $(TBB)/include
 LDFLAGS_TBB ?= -Wl,-L,"$(TBB_BIN_ABSOLUTE_PATH)" $(LDFLAGS_FLTO_FLTO) $(LDFLAGS_OPTIM_TBB)
 
 # Windows LLVM/Clang does not support -rpath, but is not needed on Windows anyway
-ifeq ($(WINARM64),)
+ifneq ($(OS), Windows_NT)
   LDFLAGS_TBB += -Wl,-rpath,"$(TBB_BIN_ABSOLUTE_PATH)"
 endif
 LDLIBS_TBB ?= -ltbb
 
@@ -141,10 +141,11 @@ ifeq (Windows_NT, $(OS))
     TBB_CXXFLAGS += -D_UCRT
   endif
   # TBB does not have assembly code for Windows ARM64, so we need to use GCC builtins
-  ifneq ($(WINARM64),)
-	TBB_CXXFLAGS += -DTBB_USE_GCC_BUILTINS
-	CXXFLAGS_TBB += -DTBB_USE_GCC_BUILTINS
-  endif
+	ifeq ($(ARM64),true)
+		TBB_CXXFLAGS += -DTBB_USE_GCC_BUILTINS
+		CXXFLAGS_TBB += -DTBB_USE_GCC_BUILTINS
+		WINARM64 = true
+	endif
 	SH_CHECK := $(shell command -v sh 2>/dev/null)
 	ifdef SH_CHECK
 		WINDOWS_HAS_SH ?= true
 
@@ -2,6 +2,7 @@
 #define STAN_MATH_FWD_FUNCTOR_HESSIAN_HPP
 
 #include <stan/math/fwd/core.hpp>
+#include <stan/math/fwd/fun/value_of.hpp>
 #include <stan/math/prim/fun/Eigen.hpp>
 
 namespace stan {
@@ -14,6 +15,9 @@ namespace math {
  * mixed definition, which is faster for Hessians, is that this
  * version is itself differentiable.
  *
+ * Instead of returning the full symmetric Hessian, we return the
+ * lower-triangular only as a column-major compressed sparse matrix.
+ *
  * <p>The functor must implement
  *
  * <code>
@@ -35,23 +39,27 @@ namespace math {
  * @param[in] x Argument to function
  * @param[out] fx Function applied to argument
  * @param[out] grad gradient of function at argument
- * @param[out] H Hessian of function at argument
+ * @param[out] H Hessian of function at argument, as a lower-triangular
+ *                      compressed sparse matrix
  */
 template <typename T, typename F>
 void hessian(const F& f, const Eigen::Matrix<T, Eigen::Dynamic, 1>& x, T& fx,
              Eigen::Matrix<T, Eigen::Dynamic, 1>& grad,
-             Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>& H) {
-  H.resize(x.size(), x.size());
-  grad.resize(x.size());
-  // size 0 separate because nothing to loop over in main body
-  if (x.size() == 0) {
-    fx = f(x);
+             Eigen::SparseMatrix<T>& H) {
+  int d = x.size();
+  if (d == 0) {
+    fx = value_of(f(x));
     return;
   }
-  Eigen::Matrix<fvar<fvar<T> >, Eigen::Dynamic, 1> x_fvar(x.size());
-  for (int i = 0; i < x.size(); ++i) {
-    for (int j = i; j < x.size(); ++j) {
-      for (int k = 0; k < x.size(); ++k) {
+
+  H.resize(d, d);
+  H.reserve(Eigen::VectorXi::LinSpaced(d, 1, d).reverse());
+  grad.resize(d);
+
+  Eigen::Matrix<fvar<fvar<T> >, Eigen::Dynamic, 1> x_fvar(d);
+  for (int i = 0; i < d; ++i) {
+    for (int j = i; j < d; ++j) {
+      for (int k = 0; k < d; ++k) {
         x_fvar(k) = fvar<fvar<T> >(fvar<T>(x(k), j == k), fvar<T>(i == k, 0));
       }
       fvar<fvar<T> > fx_fvar = f(x_fvar);
@@ -61,10 +69,38 @@ void hessian(const F& f, const Eigen::Matrix<T, Eigen::Dynamic, 1>& x, T& fx,
       if (i == j) {
         grad(i) = fx_fvar.d_.val_;
       }
-      H(i, j) = fx_fvar.d_.d_;
-      H(j, i) = H(i, j);
+      H.insert(j, i) = fx_fvar.d_.d_;
     }
   }
+  H.makeCompressed();
+}
+
+/**
+ * Calculate the value, the gradient, and the Hessian,
+ * of the specified function at the specified argument in
+ * time O(N^3) time and O(N^2) space.  The advantage over the
+ * mixed definition, which is faster for Hessians, is that this
+ * version is itself differentiable.
+ *
+ * Overload for returning the Hessian as a symmetric dense matrix.
+ *
+ * @tparam T type of elements in the vector and matrix
+ * @tparam F type of function
+ * @param[in] f Function
+ * @param[in] x Argument to function
+ * @param[out] fx Function applied to argument
+ * @param[out] grad gradient of function at argument
+ * @param[out] H Hessian of function at argument, as a symmetric matrix
+ */
+template <typename T, typename F>
+void hessian(const F& f, const Eigen::Matrix<T, Eigen::Dynamic, 1>& x, T& fx,
+             Eigen::Matrix<T, Eigen::Dynamic, 1>& grad,
+             Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>& H) {
+  Eigen::SparseMatrix<T> hess_sparse;
+  hessian(f, x, fx, grad, hess_sparse);
+
+  H = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>(hess_sparse)
+          .template selfadjointView<Eigen::Lower>();
 }
 
 }  // namespace math
 
@@ -1,9 +1,11 @@
 #ifndef STAN_MATH_MIX_FUNCTOR_HESSIAN_HPP
 #define STAN_MATH_MIX_FUNCTOR_HESSIAN_HPP
 
-#include <stan/math/fwd/core.hpp>
 #include <stan/math/prim/fun/Eigen.hpp>
+#include <stan/math/fwd/core.hpp>
+#include <stan/math/fwd/fun/value_of_rec.hpp>
 #include <stan/math/rev/core.hpp>
+#include <stan/math/rev/fun/value_of_rec.hpp>
 #include <stdexcept>
 
 namespace stan {
@@ -14,6 +16,9 @@ namespace math {
  * of the specified function at the specified argument in
  * O(N^2) time and O(N^2) space.
  *
+ * Instead of returning the full symmetric Hessian, we return the
+ * lower-triangular only as a column-major compressed sparse matrix.
+ *
  * <p>The functor must implement
  *
  * <code>
@@ -36,20 +41,22 @@ namespace math {
  * @param[in] x Argument to function
  * @param[out] fx Function applied to argument
  * @param[out] grad gradient of function at argument
- * @param[out] H Hessian of function at argument
+ * @param[out] H Hessian of function at argument, as a lower-triangular
+ *                      compressed sparse matrix
  */
 template <typename F>
-void hessian(const F& f, const Eigen::Matrix<double, Eigen::Dynamic, 1>& x,
-             double& fx, Eigen::Matrix<double, Eigen::Dynamic, 1>& grad,
-             Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>& H) {
-  H.resize(x.size(), x.size());
-  grad.resize(x.size());
-
-  // need to compute fx even with size = 0
-  if (x.size() == 0) {
-    fx = f(x);
+void hessian(const F& f, const Eigen::VectorXd& x, double& fx,
+             Eigen::VectorXd& grad, Eigen::SparseMatrix<double>& H) {
+  int d = x.size();
+  if (d == 0) {
+    fx = value_of_rec(f(x));
     return;
   }
+
+  grad.resize(d);
+  H.resize(d, d);
+  H.reserve(Eigen::VectorXi::LinSpaced(d, 1, d).reverse());
+
   for (int i = 0; i < x.size(); ++i) {
     // Run nested autodiff in this scope
     nested_rev_autodiff nested;
@@ -64,10 +71,34 @@ void hessian(const F& f, const Eigen::Matrix<double, Eigen::Dynamic, 1>& x,
       fx = fx_fvar.val_.val();
     }
     stan::math::grad(fx_fvar.d_.vi_);
-    for (int j = 0; j < x.size(); ++j) {
-      H(i, j) = x_fvar(j).val_.adj();
+    for (int j = i; j < x.size(); ++j) {
+      H.insert(j, i) = x_fvar(j).val_.adj();
     }
   }
+  H.makeCompressed();
+}
+
+/**
+ * Calculate the value, the gradient, and the Hessian,
+ * of the specified function at the specified argument in
+ * O(N^2) time and O(N^2) space.
+ *
+ * Overload for returning the Hessian as a symmetric dense matrix.
+ *
+ * @tparam F Type of function
+ * @param[in] f Function
+ * @param[in] x Argument to function
+ * @param[out] fx Function applied to argument
+ * @param[out] grad gradient of function at argument
+ * @param[out] H Hessian of function at argument, as a symmetric matrix
+ */
+template <typename F>
+void hessian(const F& f, const Eigen::VectorXd& x, double& fx,
+             Eigen::VectorXd& grad, Eigen::MatrixXd& H) {
+  Eigen::SparseMatrix<double> hess_sparse;
+  hessian(f, x, fx, grad, hess_sparse);
+
+  H = Eigen::MatrixXd(hess_sparse).selfadjointView<Eigen::Lower>();
 }
 
 }  // namespace math
 
@@ -42,9 +42,9 @@ void hessian_times_vector(const F& f,
                           Eigen::Matrix<T, Eigen::Dynamic, 1>& Hv) {
   using Eigen::Matrix;
   Matrix<T, Eigen::Dynamic, 1> grad;
-  Matrix<T, Eigen::Dynamic, Eigen::Dynamic> H;
+  Eigen::SparseMatrix<T> H;
   hessian(f, x, fx, grad, H);
-  Hv = H * v;
+  Hv = H.template selfadjointView<Eigen::Lower>() * v;
 }
 
 }  // namespace math
 
@@ -60,8 +60,14 @@ inline auto inv_sqrt(const Container& x) {
 template <typename Container, require_not_var_matrix_t<Container>* = nullptr,
           require_container_st<std::is_arithmetic, Container>* = nullptr>
 inline auto inv_sqrt(const Container& x) {
+// Eigen 3.4.0 has precision issues on ARM64 with vectorised rsqrt
+// Resolved in current master branch, below can be removed on next release
+#ifdef __aarch64__
+  return apply_scalar_unary<inv_sqrt_fun, Container>::apply(x);
+#else
   return apply_vector_unary<Container>::apply(
       x, [](const auto& v) { return v.array().rsqrt(); });
+#endif
 }
 
 }  // namespace math