insightsengineering · shajoezhu · Mar 11, 2025 · Mar 6, 2025 · Mar 10, 2025 · Mar 11, 2025
diff --git a/NEWS.md b/NEWS.md
@@ -5,6 +5,7 @@
 * Added `geom_sd` and `geom_mean_sd` to `s_summary()` default available statistics.
 * Refactored `afun_riskdiff()`, `count_occurrences()`, `count_occurrences_by_grade()`, `count_patients_with_event()`, `count_patients_with_flags()`, `count_values()`, `estimate_incidence_rate()`, `h_tab_one_biomarker()`, `summarize_change()`, `summarize_colvars()`, `summarize_patients_exposure_in_cols()`, `survival_time()`, `tabulate_rsp_subgroups()`, `tabulate_survival_subgroups()`, `tabulate_rsp_biomarkers()`, and `tabulate_survival_biomarkers()` to align with new analysis function style.
 * Converted `as_factor_keep_attributes()` to an exported function.
+* Added `denom` parameter to `estimate_proportion()`.
 
 ### Bug Fixes
 * Fixed bug in `a_count_patients_with_flags()` preventing select custom label and indentation specification formats from being applied.

diff --git a/R/estimate_proportion.R b/R/estimate_proportion.R
@@ -74,10 +74,14 @@ s_proportion <- function(df,
                          max_iterations = 50,
                          variables = list(strata = NULL),
                          long = FALSE,
+                         denom = c("n", "N_col", "N_row"),
                          ...) {
   method <- match.arg(method)
   checkmate::assert_flag(long)
   assert_proportion_value(conf_level)
+  args_list <- list(...)
+  .N_row <- args_list[[".N_row"]] # nolint
+  .N_col <- args_list[[".N_col"]] # nolint
 
   if (!is.null(variables$strata)) {
     # Checks for strata
@@ -101,23 +105,38 @@ s_proportion <- function(df,
   } else {
     rsp <- as.logical(df[[.var]])
   }
-  n <- sum(rsp)
-  p_hat <- mean(rsp)
+
+  # Stop for stratified analysis
+  if (method %in% c("strat_wilson", "strat_wilsonc") && denom[1] != "n") {
+    stop(
+      "Stratified methods only support 'n' as the denominator (denom). ",
+      "Consider adding negative responders directly to the dataset."
+    )
+  }
+
+  denom <- match.arg(denom) %>%
+    switch(
+      n = length(rsp),
+      N_row = .N_row,
+      N_col = .N_col
+    )
+  n_rsp <- sum(rsp)
+  p_hat <- ifelse(denom > 0, n_rsp / denom, 0)
 
   prop_ci <- switch(method,
-    "clopper-pearson" = prop_clopper_pearson(rsp, conf_level),
-    "wilson" = prop_wilson(rsp, conf_level),
-    "wilsonc" = prop_wilson(rsp, conf_level, correct = TRUE),
+    "clopper-pearson" = prop_clopper_pearson(rsp, n = denom, conf_level),
+    "wilson" = prop_wilson(rsp, n = denom, conf_level),
+    "wilsonc" = prop_wilson(rsp, n = denom, conf_level, correct = TRUE),
     "strat_wilson" = prop_strat_wilson(rsp, strata, weights, conf_level, max_iterations, correct = FALSE)$conf_int,
     "strat_wilsonc" = prop_strat_wilson(rsp, strata, weights, conf_level, max_iterations, correct = TRUE)$conf_int,
-    "wald" = prop_wald(rsp, conf_level),
-    "waldcc" = prop_wald(rsp, conf_level, correct = TRUE),
-    "agresti-coull" = prop_agresti_coull(rsp, conf_level),
-    "jeffreys" = prop_jeffreys(rsp, conf_level)
+    "wald" = prop_wald(rsp, n = denom, conf_level),
+    "waldcc" = prop_wald(rsp, n = denom, conf_level, correct = TRUE),
+    "agresti-coull" = prop_agresti_coull(rsp, n = denom, conf_level),
+    "jeffreys" = prop_jeffreys(rsp, n = denom, conf_level)
   )
 
   list(
-    "n_prop" = formatters::with_label(c(n, p_hat), "Responders"),
+    "n_prop" = formatters::with_label(c(n_rsp, p_hat), "Responders"),
     "prop_ci" = formatters::with_label(x = 100 * prop_ci, label = d_proportion(conf_level, method, long = long))
   )
 }
@@ -290,10 +309,10 @@ NULL
 #' prop_wilson(rsp, conf_level = 0.9)
 #'
 #' @export
-prop_wilson <- function(rsp, conf_level, correct = FALSE) {
+prop_wilson <- function(rsp, n = length(rsp), conf_level, correct = FALSE) {
   y <- stats::prop.test(
     sum(rsp),
-    length(rsp),
+    n,
     correct = correct,
     conf.level = conf_level
   )
@@ -424,15 +443,17 @@ prop_strat_wilson <- function(rsp,
 #' @describeIn h_proportions Calculates the Clopper-Pearson interval by calling [stats::binom.test()].
 #'   Also referred to as the `exact` method.
 #'
+#' @param n (`count`)\cr number of participants (if `denom = "N_col"`) or the number of responders
+#'   (if `denom = "n"`, the default).
+#'
 #' @examples
 #' prop_clopper_pearson(rsp, conf_level = .95)
 #'
 #' @export
-prop_clopper_pearson <- function(rsp,
-                                 conf_level) {
+prop_clopper_pearson <- function(rsp, n = length(rsp), conf_level) {
   y <- stats::binom.test(
     x = sum(rsp),
-    n = length(rsp),
+    n = n,
     conf.level = conf_level
   )
   as.numeric(y$conf.int)
@@ -448,9 +469,8 @@ prop_clopper_pearson <- function(rsp,
 #' prop_wald(rsp, conf_level = 0.95, correct = TRUE)
 #'
 #' @export
-prop_wald <- function(rsp, conf_level, correct = FALSE) {
-  n <- length(rsp)
-  p_hat <- mean(rsp)
+prop_wald <- function(rsp, n = length(rsp), conf_level, correct = FALSE) {
+  p_hat <- ifelse(n > 0, sum(rsp) / n, 0)
   z <- stats::qnorm((1 + conf_level) / 2)
   q_hat <- 1 - p_hat
   correct <- if (correct) 1 / (2 * n) else 0
@@ -469,8 +489,7 @@ prop_wald <- function(rsp, conf_level, correct = FALSE) {
 #' prop_agresti_coull(rsp, conf_level = 0.95)
 #'
 #' @export
-prop_agresti_coull <- function(rsp, conf_level) {
-  n <- length(rsp)
+prop_agresti_coull <- function(rsp, n = length(rsp), conf_level) {
   x_sum <- sum(rsp)
   z <- stats::qnorm((1 + conf_level) / 2)
 
@@ -495,9 +514,7 @@ prop_agresti_coull <- function(rsp, conf_level) {
 #' prop_jeffreys(rsp, conf_level = 0.95)
 #'
 #' @export
-prop_jeffreys <- function(rsp,
-                          conf_level) {
-  n <- length(rsp)
+prop_jeffreys <- function(rsp, n = length(rsp), conf_level) {
   x_sum <- sum(rsp)
 
   alpha <- 1 - conf_level

diff --git a/man/estimate_proportion.Rd b/man/estimate_proportion.Rd
diff --git a/man/h_proportions.Rd b/man/h_proportions.Rd
diff --git a/tests/testthat/_snaps/estimate_proportion.md b/tests/testthat/_snaps/estimate_proportion.md
@@ -217,3 +217,24 @@
       [1,] "108.00 (54.00%)"   
       [2,] "(46.7669, 60.5951)"
 
+# `estimate_proportion` works with different denominators
+
+    Code
+      res
+    Output
+                                         all obs   
+                                         (N=200)   
+      —————————————————————————————————————————————
+      Responders                       108 (54.0%) 
+      95% CI (Wald, with correction)   (46.8, 61.2)
+
+---
+
+    Code
+      res
+    Output
+                                         all obs   
+      —————————————————————————————————————————————
+      Responders                       108 (54.0%) 
+      95% CI (Wald, with correction)   (46.8, 61.2)
+
diff --git a/tests/testthat/test-estimate_proportion.R b/tests/testthat/test-estimate_proportion.R
@@ -339,3 +339,46 @@ testthat::test_that(
     testthat::expect_snapshot(res)
   }
 )
+testthat::test_that("`estimate_proportion` works with different denominators", {
+  set.seed(1)
+
+  # Data loading and processing
+  anl <- tern_ex_adrs %>%
+    dplyr::filter(PARAMCD == "BESRSPI") %>%
+    dplyr::mutate(DTHFL = DTHFL == "Y") # Death flag yes
+
+  # Changing other variables (weights and max_nt)
+  n_ws <- length(unique(anl$SEX)) * length(unique(anl$STRATA1))
+  expect_error(
+    {
+      result <- basic_table() %>%
+        estimate_proportion(
+          vars = "DTHFL",
+          method = "strat_wilson",
+          variables = list(strata = c("SEX", "STRATA1")),
+          weights = rep(1 / n_ws, n_ws),
+          denom = "N_cols"
+        ) %>%
+        build_table(anl)
+    },
+    "Stratified methods only support"
+  )
+
+  result <- basic_table() %>%
+    estimate_proportion(
+      vars = "DTHFL",
+      denom = "N_col"
+    ) %>%
+    build_table(anl, col_counts = c(200))
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+
+  result <- basic_table() %>%
+    estimate_proportion(
+      vars = "DTHFL",
+      denom = "n"
+    ) %>%
+    build_table(anl)
+  res <- testthat::expect_silent(result)
+  testthat::expect_snapshot(res)
+})