diff --git a/NEWS.md b/NEWS.md index 1286af6b3e..b5c83455fa 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,7 @@ * Added `geom_sd` and `geom_mean_sd` to `s_summary()` default available statistics. * Refactored `afun_riskdiff()`, `count_occurrences()`, `count_occurrences_by_grade()`, `count_patients_with_event()`, `count_patients_with_flags()`, `count_values()`, `estimate_incidence_rate()`, `h_tab_one_biomarker()`, `summarize_change()`, `summarize_colvars()`, `summarize_patients_exposure_in_cols()`, `survival_time()`, `tabulate_rsp_subgroups()`, `tabulate_survival_subgroups()`, `tabulate_rsp_biomarkers()`, and `tabulate_survival_biomarkers()` to align with new analysis function style. * Converted `as_factor_keep_attributes()` to an exported function. +* Added `denom` parameter to `estimate_proportion()`. ### Bug Fixes * Fixed bug in `a_count_patients_with_flags()` preventing select custom label and indentation specification formats from being applied. diff --git a/R/estimate_proportion.R b/R/estimate_proportion.R index dd9bffe221..63e14c8d66 100644 --- a/R/estimate_proportion.R +++ b/R/estimate_proportion.R @@ -74,10 +74,14 @@ s_proportion <- function(df, max_iterations = 50, variables = list(strata = NULL), long = FALSE, + denom = c("n", "N_col", "N_row"), ...) { method <- match.arg(method) checkmate::assert_flag(long) assert_proportion_value(conf_level) + args_list <- list(...) + .N_row <- args_list[[".N_row"]] # nolint + .N_col <- args_list[[".N_col"]] # nolint if (!is.null(variables$strata)) { # Checks for strata @@ -101,23 +105,38 @@ s_proportion <- function(df, } else { rsp <- as.logical(df[[.var]]) } - n <- sum(rsp) - p_hat <- mean(rsp) + + # Stop for stratified analysis + if (method %in% c("strat_wilson", "strat_wilsonc") && denom[1] != "n") { + stop( + "Stratified methods only support 'n' as the denominator (denom). ", + "Consider adding negative responders directly to the dataset." + ) + } + + denom <- match.arg(denom) %>% + switch( + n = length(rsp), + N_row = .N_row, + N_col = .N_col + ) + n_rsp <- sum(rsp) + p_hat <- ifelse(denom > 0, n_rsp / denom, 0) prop_ci <- switch(method, - "clopper-pearson" = prop_clopper_pearson(rsp, conf_level), - "wilson" = prop_wilson(rsp, conf_level), - "wilsonc" = prop_wilson(rsp, conf_level, correct = TRUE), + "clopper-pearson" = prop_clopper_pearson(rsp, n = denom, conf_level), + "wilson" = prop_wilson(rsp, n = denom, conf_level), + "wilsonc" = prop_wilson(rsp, n = denom, conf_level, correct = TRUE), "strat_wilson" = prop_strat_wilson(rsp, strata, weights, conf_level, max_iterations, correct = FALSE)$conf_int, "strat_wilsonc" = prop_strat_wilson(rsp, strata, weights, conf_level, max_iterations, correct = TRUE)$conf_int, - "wald" = prop_wald(rsp, conf_level), - "waldcc" = prop_wald(rsp, conf_level, correct = TRUE), - "agresti-coull" = prop_agresti_coull(rsp, conf_level), - "jeffreys" = prop_jeffreys(rsp, conf_level) + "wald" = prop_wald(rsp, n = denom, conf_level), + "waldcc" = prop_wald(rsp, n = denom, conf_level, correct = TRUE), + "agresti-coull" = prop_agresti_coull(rsp, n = denom, conf_level), + "jeffreys" = prop_jeffreys(rsp, n = denom, conf_level) ) list( - "n_prop" = formatters::with_label(c(n, p_hat), "Responders"), + "n_prop" = formatters::with_label(c(n_rsp, p_hat), "Responders"), "prop_ci" = formatters::with_label(x = 100 * prop_ci, label = d_proportion(conf_level, method, long = long)) ) } @@ -290,10 +309,10 @@ NULL #' prop_wilson(rsp, conf_level = 0.9) #' #' @export -prop_wilson <- function(rsp, conf_level, correct = FALSE) { +prop_wilson <- function(rsp, n = length(rsp), conf_level, correct = FALSE) { y <- stats::prop.test( sum(rsp), - length(rsp), + n, correct = correct, conf.level = conf_level ) @@ -424,15 +443,17 @@ prop_strat_wilson <- function(rsp, #' @describeIn h_proportions Calculates the Clopper-Pearson interval by calling [stats::binom.test()]. #' Also referred to as the `exact` method. #' +#' @param n (`count`)\cr number of participants (if `denom = "N_col"`) or the number of responders +#' (if `denom = "n"`, the default). +#' #' @examples #' prop_clopper_pearson(rsp, conf_level = .95) #' #' @export -prop_clopper_pearson <- function(rsp, - conf_level) { +prop_clopper_pearson <- function(rsp, n = length(rsp), conf_level) { y <- stats::binom.test( x = sum(rsp), - n = length(rsp), + n = n, conf.level = conf_level ) as.numeric(y$conf.int) @@ -448,9 +469,8 @@ prop_clopper_pearson <- function(rsp, #' prop_wald(rsp, conf_level = 0.95, correct = TRUE) #' #' @export -prop_wald <- function(rsp, conf_level, correct = FALSE) { - n <- length(rsp) - p_hat <- mean(rsp) +prop_wald <- function(rsp, n = length(rsp), conf_level, correct = FALSE) { + p_hat <- ifelse(n > 0, sum(rsp) / n, 0) z <- stats::qnorm((1 + conf_level) / 2) q_hat <- 1 - p_hat correct <- if (correct) 1 / (2 * n) else 0 @@ -469,8 +489,7 @@ prop_wald <- function(rsp, conf_level, correct = FALSE) { #' prop_agresti_coull(rsp, conf_level = 0.95) #' #' @export -prop_agresti_coull <- function(rsp, conf_level) { - n <- length(rsp) +prop_agresti_coull <- function(rsp, n = length(rsp), conf_level) { x_sum <- sum(rsp) z <- stats::qnorm((1 + conf_level) / 2) @@ -495,9 +514,7 @@ prop_agresti_coull <- function(rsp, conf_level) { #' prop_jeffreys(rsp, conf_level = 0.95) #' #' @export -prop_jeffreys <- function(rsp, - conf_level) { - n <- length(rsp) +prop_jeffreys <- function(rsp, n = length(rsp), conf_level) { x_sum <- sum(rsp) alpha <- 1 - conf_level diff --git a/man/estimate_proportion.Rd b/man/estimate_proportion.Rd index 6ac375550d..c9200cdbe9 100644 --- a/man/estimate_proportion.Rd +++ b/man/estimate_proportion.Rd @@ -38,6 +38,7 @@ s_proportion( max_iterations = 50, variables = list(strata = NULL), long = FALSE, + denom = c("n", "N_col", "N_row"), ... ) @@ -110,6 +111,13 @@ variable name in \code{.var}.} \item{.var}{(\code{string})\cr single variable name that is passed by \code{rtables} when requested by a statistics function.} + +\item{denom}{(\code{string})\cr choice of denominator for proportion. Options are: +\itemize{ +\item \code{n}: number of values in this row and column intersection. +\item \code{N_row}: total number of values in this row across columns. +\item \code{N_col}: total number of values in this column across rows. +}} } \value{ \itemize{ diff --git a/man/h_proportions.Rd b/man/h_proportions.Rd index d9859bd65a..da86e38483 100644 --- a/man/h_proportions.Rd +++ b/man/h_proportions.Rd @@ -10,7 +10,7 @@ \alias{prop_jeffreys} \title{Helper functions for calculating proportion confidence intervals} \usage{ -prop_wilson(rsp, conf_level, correct = FALSE) +prop_wilson(rsp, n = length(rsp), conf_level, correct = FALSE) prop_strat_wilson( rsp, @@ -21,17 +21,20 @@ prop_strat_wilson( correct = FALSE ) -prop_clopper_pearson(rsp, conf_level) +prop_clopper_pearson(rsp, n = length(rsp), conf_level) -prop_wald(rsp, conf_level, correct = FALSE) +prop_wald(rsp, n = length(rsp), conf_level, correct = FALSE) -prop_agresti_coull(rsp, conf_level) +prop_agresti_coull(rsp, n = length(rsp), conf_level) -prop_jeffreys(rsp, conf_level) +prop_jeffreys(rsp, n = length(rsp), conf_level) } \arguments{ \item{rsp}{(\code{logical})\cr vector indicating whether each subject is a responder or not.} +\item{n}{(\code{count})\cr number of participants (if \code{denom = "N_col"}) or the number of responders +(if \code{denom = "n"}, the default).} + \item{conf_level}{(\code{proportion})\cr confidence level of the interval.} \item{correct}{(\code{flag})\cr whether to apply continuity correction.} diff --git a/tests/testthat/_snaps/estimate_proportion.md b/tests/testthat/_snaps/estimate_proportion.md index 95f9a2b75e..cffc32e630 100644 --- a/tests/testthat/_snaps/estimate_proportion.md +++ b/tests/testthat/_snaps/estimate_proportion.md @@ -217,3 +217,24 @@ [1,] "108.00 (54.00%)" [2,] "(46.7669, 60.5951)" +# `estimate_proportion` works with different denominators + + Code + res + Output + all obs + (N=200) + ————————————————————————————————————————————— + Responders 108 (54.0%) + 95% CI (Wald, with correction) (46.8, 61.2) + +--- + + Code + res + Output + all obs + ————————————————————————————————————————————— + Responders 108 (54.0%) + 95% CI (Wald, with correction) (46.8, 61.2) + diff --git a/tests/testthat/test-estimate_proportion.R b/tests/testthat/test-estimate_proportion.R index 28a463053d..7695912eac 100644 --- a/tests/testthat/test-estimate_proportion.R +++ b/tests/testthat/test-estimate_proportion.R @@ -339,3 +339,46 @@ testthat::test_that( testthat::expect_snapshot(res) } ) +testthat::test_that("`estimate_proportion` works with different denominators", { + set.seed(1) + + # Data loading and processing + anl <- tern_ex_adrs %>% + dplyr::filter(PARAMCD == "BESRSPI") %>% + dplyr::mutate(DTHFL = DTHFL == "Y") # Death flag yes + + # Changing other variables (weights and max_nt) + n_ws <- length(unique(anl$SEX)) * length(unique(anl$STRATA1)) + expect_error( + { + result <- basic_table() %>% + estimate_proportion( + vars = "DTHFL", + method = "strat_wilson", + variables = list(strata = c("SEX", "STRATA1")), + weights = rep(1 / n_ws, n_ws), + denom = "N_cols" + ) %>% + build_table(anl) + }, + "Stratified methods only support" + ) + + result <- basic_table() %>% + estimate_proportion( + vars = "DTHFL", + denom = "N_col" + ) %>% + build_table(anl, col_counts = c(200)) + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) + + result <- basic_table() %>% + estimate_proportion( + vars = "DTHFL", + denom = "n" + ) %>% + build_table(anl) + res <- testthat::expect_silent(result) + testthat::expect_snapshot(res) +})