Merge pull request #3037 from stan-dev/robust-weibull-cdf

andrjohns · web-flow · commit 87d04dcab539 · 2024-03-27T00:07:39.000+02:00
Make Weibull cdf &amp; lcdf more robust, handle y = 0 inputs
diff --git a/stan/math/prim/prob/weibull_cdf.hpp b/stan/math/prim/prob/weibull_cdf.hpp
@@ -43,7 +43,6 @@ return_type_t<T_y, T_shape, T_scale> weibull_cdf(const T_y& y,
   using T_y_ref = ref_type_if_not_constant_t<T_y>;
   using T_alpha_ref = ref_type_if_not_constant_t<T_shape>;
   using T_sigma_ref = ref_type_if_not_constant_t<T_scale>;
-  using std::pow;
   static constexpr const char* function = "weibull_cdf";
 
   T_y_ref y_ref = y;
@@ -63,34 +62,36 @@ return_type_t<T_y, T_shape, T_scale> weibull_cdf(const T_y& y,
   }
 
   auto ops_partials = make_partials_propagator(y_ref, alpha_ref, sigma_ref);
+  if (any(value_of_rec(y_val) == 0)) {
+    return ops_partials.build(0.0);
+  }
 
   constexpr bool any_derivs = !is_constant_all<T_y, T_shape, T_scale>::value;
-  const auto& pow_n = to_ref_if<any_derivs>(pow(y_val / sigma_val, alpha_val));
-  const auto& exp_n = to_ref_if<any_derivs>(exp(-pow_n));
-  const auto& cdf_n = to_ref_if<any_derivs>(1 - exp_n);
+  const auto& log_y = to_ref_if<any_derivs>(log(y_val));
+  const auto& log_sigma = to_ref_if<any_derivs>(log(sigma_val));
+  const auto& log_y_div_sigma = to_ref_if<any_derivs>(log_y - log_sigma);
+  const auto& log_pow_n = to_ref_if<any_derivs>(alpha_val * log_y_div_sigma);
+  const auto& pow_n = to_ref_if<any_derivs>(exp(log_pow_n));
+  const auto& log_cdf_n = to_ref_if<any_derivs>(log1m_exp(-pow_n));
 
-  T_partials_return cdf = prod(cdf_n);
+  T_partials_return log_cdf = sum(log_cdf_n);
 
   if (any_derivs) {
-    const auto& rep_deriv = to_ref_if<(!is_constant_all<T_y, T_scale>::value
-                                       && !is_constant_all<T_shape>::value)>(
-        exp_n * pow_n * cdf / cdf_n);
+    const auto& log_rep_deriv = to_ref(log_pow_n + log_cdf - log_cdf_n - pow_n);
     if (!is_constant_all<T_y, T_scale>::value) {
-      const auto& deriv_y_sigma = to_ref_if<(
-          !is_constant_all<T_y>::value && !is_constant_all<T_scale>::value)>(
-          rep_deriv * alpha_val);
+      const auto& log_deriv_y_sigma = to_ref(log_rep_deriv + log(alpha_val));
       if (!is_constant_all<T_y>::value) {
-        partials<0>(ops_partials) = deriv_y_sigma / y_val;
+        partials<0>(ops_partials) = exp(log_deriv_y_sigma - log_y);
       }
       if (!is_constant_all<T_scale>::value) {
-        partials<2>(ops_partials) = -deriv_y_sigma / sigma_val;
+        partials<2>(ops_partials) = -exp(log_deriv_y_sigma - log_sigma);
       }
     }
     if (!is_constant_all<T_shape>::value) {
-      partials<1>(ops_partials) = rep_deriv * log(y_val / sigma_val);
+      partials<1>(ops_partials) = exp(log_rep_deriv) * log_y_div_sigma;
     }
   }
-  return ops_partials.build(cdf);
+  return ops_partials.build(exp(log_cdf));
 }
 
 }  // namespace math
diff --git a/stan/math/prim/prob/weibull_lcdf.hpp b/stan/math/prim/prob/weibull_lcdf.hpp
@@ -39,11 +39,9 @@ template <typename T_y, typename T_shape, typename T_scale,
 return_type_t<T_y, T_shape, T_scale> weibull_lcdf(const T_y& y,
                                                   const T_shape& alpha,
                                                   const T_scale& sigma) {
-  using T_partials_return = partials_return_t<T_y, T_shape, T_scale>;
   using T_y_ref = ref_type_if_not_constant_t<T_y>;
   using T_alpha_ref = ref_type_if_not_constant_t<T_shape>;
   using T_sigma_ref = ref_type_if_not_constant_t<T_scale>;
-  using std::pow;
   static constexpr const char* function = "weibull_lcdf";
 
   T_y_ref y_ref = y;
@@ -63,34 +61,34 @@ return_type_t<T_y, T_shape, T_scale> weibull_lcdf(const T_y& y,
   }
 
   auto ops_partials = make_partials_propagator(y_ref, alpha_ref, sigma_ref);
+  if (any(value_of_rec(y_val) == 0)) {
+    return ops_partials.build(stan::math::NEGATIVE_INFTY);
+  }
 
   constexpr bool any_derivs = !is_constant_all<T_y, T_shape, T_scale>::value;
-  const auto& pow_n = to_ref_if<any_derivs>(pow(y_val / sigma_val, alpha_val));
-  const auto& exp_n = to_ref_if<any_derivs>(exp(-pow_n));
+  const auto& log_y = to_ref_if<any_derivs>(log(y_val));
+  const auto& log_sigma = to_ref_if<any_derivs>(log(sigma_val));
+  const auto& log_y_div_sigma = to_ref_if<any_derivs>(log_y - log_sigma);
+  const auto& log_pow_n = to_ref_if<any_derivs>(alpha_val * log_y_div_sigma);
+  const auto& pow_n = to_ref_if<any_derivs>(exp(log_pow_n));
 
-  // TODO(Andrew) Further simplify derivatives and log1m_exp below
-  T_partials_return cdf_log = sum(log1m(exp_n));
+  if (any_derivs) {
+    const auto& log_rep_deriv = to_ref(log_pow_n - log_diff_exp(pow_n, 0.0));
 
-  if (!is_constant_all<T_y, T_scale, T_shape>::value) {
-    const auto& rep_deriv = to_ref_if<(!is_constant_all<T_y, T_scale>::value
-                                       && !is_constant_all<T_shape>::value)>(
-        pow_n / (1.0 / exp_n - 1.0));
     if (!is_constant_all<T_y, T_scale>::value) {
-      const auto& deriv_y_sigma = to_ref_if<(
-          !is_constant_all<T_y>::value && !is_constant_all<T_scale>::value)>(
-          rep_deriv * alpha_val);
+      const auto& log_deriv_y_sigma = to_ref(log_rep_deriv + log(alpha_val));
       if (!is_constant_all<T_y>::value) {
-        partials<0>(ops_partials) = deriv_y_sigma / y_val;
+        partials<0>(ops_partials) = exp(log_deriv_y_sigma - log_y);
       }
       if (!is_constant_all<T_scale>::value) {
-        partials<2>(ops_partials) = -deriv_y_sigma / sigma_val;
+        partials<2>(ops_partials) = -exp(log_deriv_y_sigma - log_sigma);
       }
     }
     if (!is_constant_all<T_shape>::value) {
-      partials<1>(ops_partials) = rep_deriv * log(y_val / sigma_val);
+      partials<1>(ops_partials) = exp(log_rep_deriv) * log_y_div_sigma;
     }
   }
-  return ops_partials.build(cdf_log);
+  return ops_partials.build(sum(log1m_exp(-pow_n)));
 }
 
 }  // namespace math
diff --git a/test/unit/math/mix/prob/weibull_cdf_test.cpp b/test/unit/math/mix/prob/weibull_cdf_test.cpp
@@ -0,0 +1,37 @@
+#include <stan/math/mix.hpp>
+#include <test/unit/math/test_ad.hpp>
+
+TEST(mathMixScalFun, weibull_cdf) {
+  // Inputs are tested on the log (i.e., unconstrained) scale so that the
+  // finite-diffs don't result in invalid inputs.
+  auto f = [](const auto& y, const auto& alpha, const auto& sigma) {
+    using stan::math::lb_constrain;
+    using stan::math::positive_constrain;
+
+    return stan::math::weibull_cdf(lb_constrain(y, 0.0),
+                                   positive_constrain(alpha),
+                                   positive_constrain(sigma));
+  };
+
+  using stan::math::log;
+
+  Eigen::VectorXd y(3);
+  y << stan::math::NEGATIVE_INFTY, 1.2, 0.0;  // lb_constrain(y[0], 0.0) = 0.0
+
+  Eigen::VectorXd alpha(3);
+  alpha << 2.0, 3.0, 4.0;
+
+  Eigen::VectorXd sigma(3);
+  sigma << 5.0, 6.0, 7.0;
+
+  stan::test::expect_ad(f, y, alpha, sigma);
+  stan::test::expect_ad(f, y[0], alpha, sigma);
+  stan::test::expect_ad(f, y, alpha[0], sigma);
+  stan::test::expect_ad(f, y, alpha, sigma[0]);
+
+  stan::test::expect_ad(f, y[0], alpha[0], sigma);
+  stan::test::expect_ad(f, y[0], alpha, sigma[0]);
+  stan::test::expect_ad(f, y, alpha[0], sigma[0]);
+
+  stan::test::expect_ad(f, y[0], alpha[0], sigma[0]);
+}
diff --git a/test/unit/math/mix/prob/weibull_lcdf_test.cpp b/test/unit/math/mix/prob/weibull_lcdf_test.cpp
@@ -0,0 +1,37 @@
+#include <stan/math/mix.hpp>
+#include <test/unit/math/test_ad.hpp>
+
+TEST(mathMixScalFun, weibull_lcdf) {
+  // Inputs are tested on the log (i.e., unconstrained) scale so that the
+  // finite-diffs don't result in invalid inputs.
+  auto f = [](const auto& y, const auto& alpha, const auto& sigma) {
+    using stan::math::lb_constrain;
+    using stan::math::positive_constrain;
+
+    return stan::math::weibull_lcdf(lb_constrain(y, 0.0),
+                                    positive_constrain(alpha),
+                                    positive_constrain(sigma));
+  };
+
+  using stan::math::log;
+
+  Eigen::VectorXd y(3);
+  y << stan::math::NEGATIVE_INFTY, 1.2, 0.0;  // lb_constrain(y[0], 0.0) = 0.0
+
+  Eigen::VectorXd alpha(3);
+  alpha << 2.0, 3.0, 4.0;
+
+  Eigen::VectorXd sigma(3);
+  sigma << 5.0, 6.0, 7.0;
+
+  stan::test::expect_ad(f, y, alpha, sigma);
+  stan::test::expect_ad(f, y[0], alpha, sigma);
+  stan::test::expect_ad(f, y, alpha[0], sigma);
+  stan::test::expect_ad(f, y, alpha, sigma[0]);
+
+  stan::test::expect_ad(f, y[0], alpha[0], sigma);
+  stan::test::expect_ad(f, y[0], alpha, sigma[0]);
+  stan::test::expect_ad(f, y, alpha[0], sigma[0]);
+
+  stan::test::expect_ad(f, y[0], alpha[0], sigma[0]);
+}