From 96913bf828f99fb877227a3722fb2248cd9234b5 Mon Sep 17 00:00:00 2001 From: Melkiades Date: Mon, 3 Feb 2025 18:47:38 +0100 Subject: [PATCH 01/12] init work on prop_diff --- R/prop_diff.R | 134 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 108 insertions(+), 26 deletions(-) diff --git a/R/prop_diff.R b/R/prop_diff.R index bcf28c6272..efdbb74396 100644 --- a/R/prop_diff.R +++ b/R/prop_diff.R @@ -64,7 +64,8 @@ s_proportion_diff <- function(df, "ha", "newcombe", "newcombecc", "strat_newcombe", "strat_newcombecc" ), - weights_method = "cmh") { + weights_method = "cmh", + ...) { method <- match.arg(method) if (is.null(variables$strata) && checkmate::test_subset(method, c("cmh", "strat_newcombe", "strat_newcombecc"))) { stop(paste( @@ -151,6 +152,7 @@ s_proportion_diff <- function(df, #' @examples #' a_proportion_diff( #' df = subset(dta, grp == "A"), +#' .stats = c("diff"), #' .var = "rsp", #' .ref_group = subset(dta, grp == "B"), #' .in_ref_col = FALSE, @@ -159,11 +161,73 @@ s_proportion_diff <- function(df, #' ) #' #' @export -a_proportion_diff <- make_afun( - s_proportion_diff, - .formats = c(diff = "xx.x", diff_ci = "(xx.x, xx.x)"), - .indent_mods = c(diff = 0L, diff_ci = 1L) -) +a_proportion_diff <- function(df, + ..., + .stats = NULL, + .stat_names = NULL, + .formats = NULL, + .labels = NULL, + .indent_mods = NULL) { + dots_extra_args <- list(...) + + # Check if there are user-defined functions + default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) + .stats <- default_and_custom_stats_list$default_stats + custom_stat_functions <- default_and_custom_stats_list$custom_stats + + # Adding automatically extra parameters to the statistic function (see ?rtables::additional_fun_params) + extra_afun_params <- retrieve_extra_afun_params( + names(dots_extra_args$.additional_fun_parameters) + ) + dots_extra_args$.additional_fun_parameters <- NULL # After extraction we do not need them anymore + + # Main statistical functions application + x_stats <- .apply_stat_functions( + default_stat_fnc = s_proportion_diff, + custom_stat_fnc_list = custom_stat_functions, + args_list = c( + df = list(df), + extra_afun_params, + dots_extra_args + ) + ) + + # Fill in with stats defaults if needed + met_grp <- get_stats("estimate_proportion_diff", stats_in = .stats) + + x_stats <- x_stats[.stats] + + # Fill in formats/indents/labels with custom input and defaults + .formats <- get_formats_from_stats(.stats, .formats) + .indent_mods <- get_indents_from_stats(.stats, .indent_mods) + if (is.null(.labels)) { + .labels <- sapply(x_stats, attr, "label") + } + .labels <- get_labels_from_stats(.stats, .labels) + + # Auto format handling + .formats <- apply_auto_formatting( + .formats, + x_stats, + extra_afun_params$.df_row, + extra_afun_params$.var + ) + + # Get and check statistical names from defaults + .stat_names <- get_stat_names(x_stats, .stat_names) # note is x_stats + + # Empty result when no statistics are calculated (reference group) + x_stats <- lapply(x_stats, function(xi) if(!nzchar(xi)) NULL else xi) + + in_rows( + .list = x_stats, + .formats = .formats, + .names = names(.labels), + .stat_names = .stat_names, + .labels = .labels %>% .unlist_keep_nulls(), + .indent_mods = .indent_mods %>% .unlist_keep_nulls() + ) +} #' @describeIn prop_diff Layout-creating function which can take statistics function arguments #' and additional format arguments. This function is a wrapper for [rtables::analyze()]. @@ -198,6 +262,14 @@ a_proportion_diff <- make_afun( #' @order 2 estimate_proportion_diff <- function(lyt, vars, + var_labels = vars, + na_str = default_na_str(), + nested = TRUE, + show_labels = "default", + table_names = vars, + section_div = NA_character_, + ..., + na_rm = TRUE, variables = list(strata = NULL), conf_level = 0.95, method = c( @@ -206,38 +278,48 @@ estimate_proportion_diff <- function(lyt, "strat_newcombe", "strat_newcombecc" ), weights_method = "cmh", - na_str = default_na_str(), - nested = TRUE, - ..., - var_labels = vars, - show_labels = "hidden", - table_names = vars, - .stats = NULL, - .formats = NULL, + .stats = c("diff", "diff_ci"), + .stat_names = NULL, + .formats = c(diff = "xx.x", diff_ci = "(xx.x, xx.x)"), .labels = NULL, - .indent_mods = NULL) { + .indent_mods = c(diff = 0L, diff_ci = 1L)) { + # Depending on main functions extra_args <- list( - variables = variables, conf_level = conf_level, method = method, weights_method = weights_method, ... + "na_rm" = na_rm, + "variables" = variables, + "conf_level" = conf_level, + "method" = method, + "weights_method" = weights_method, + ... ) - afun <- make_afun( - a_proportion_diff, - .stats = .stats, - .formats = .formats, - .labels = .labels, - .indent_mods = .indent_mods + # Needed defaults + if (!is.null(.stats)) extra_args[[".stats"]] <- .stats + if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names + if (!is.null(.formats)) extra_args[[".formats"]] <- .formats + if (!is.null(.labels)) extra_args[[".labels"]] <- .labels + if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods + + # Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params) + extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) + formals(a_proportion_diff) <- c( + formals(a_proportion_diff), + extra_args[[".additional_fun_parameters"]] ) + # Main {rtables} structural call analyze( - lyt, - vars, - afun = afun, + lyt = lyt, + vars = vars, var_labels = var_labels, + afun = a_proportion_diff, na_str = na_str, + inclNAs = !na_rm, nested = nested, extra_args = extra_args, show_labels = show_labels, - table_names = table_names + table_names = table_names, + section_div = section_div ) } From cf8da416e16d4ee0b6b29d6467b4f8e9b1b2d956 Mon Sep 17 00:00:00 2001 From: Melkiades Date: Tue, 4 Feb 2025 12:27:36 +0100 Subject: [PATCH 02/12] fix ref --- R/prop_diff.R | 6 ++---- tern.Rproj | 1 - tests/testthat/test-prop_diff.R | 4 ++-- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/R/prop_diff.R b/R/prop_diff.R index efdbb74396..e0afe452c6 100644 --- a/R/prop_diff.R +++ b/R/prop_diff.R @@ -73,7 +73,7 @@ s_proportion_diff <- function(df, "permitted. Please choose a different method." )) } - y <- list(diff = "", diff_ci = "") + y <- list(diff = character(), diff_ci = character()) if (!.in_ref_col) { rsp <- c(.ref_group[[.var]], df[[.var]]) @@ -215,9 +215,7 @@ a_proportion_diff <- function(df, # Get and check statistical names from defaults .stat_names <- get_stat_names(x_stats, .stat_names) # note is x_stats - - # Empty result when no statistics are calculated (reference group) - x_stats <- lapply(x_stats, function(xi) if(!nzchar(xi)) NULL else xi) + .stat_names <- paste0(.stat_names, "_", dots_extra_args$method) in_rows( .list = x_stats, diff --git a/tern.Rproj b/tern.Rproj index a8fcecc6fb..4f69393588 100644 --- a/tern.Rproj +++ b/tern.Rproj @@ -1,5 +1,4 @@ Version: 1.0 -ProjectId: 9441de74-2fb5-42be-9c7a-c2a704b8aa93 RestoreWorkspace: Default SaveWorkspace: Default diff --git a/tests/testthat/test-prop_diff.R b/tests/testthat/test-prop_diff.R index c6cdcc92db..c5296fecfa 100644 --- a/tests/testthat/test-prop_diff.R +++ b/tests/testthat/test-prop_diff.R @@ -262,7 +262,7 @@ testthat::test_that("`estimate_proportion_diff` and cmh is compatible with `rtab vars = "rsp", variables = list(strata = c("f1", "f2")), conf_level = 0.90, - .formats = c("xx.xxxx", "(xx.xxxx, xx.xxxx)"), + .formats = c(diff = "xx.xxxx", diff_ci = "(xx.xxxx, xx.xxxx)"), method = "cmh" ) @@ -292,7 +292,7 @@ testthat::test_that("`estimate_proportion_diff` and strat_newcombe is compatible vars = "rsp", variables = list(strata = c("f1", "f2")), conf_level = 0.95, - .formats = c("xx.xx", "(xx.xx, xx.xx)"), + .formats = c(diff = "xx.xx", diff_ci = "(xx.xx, xx.xx)"), method = "strat_newcombe" ) result <- build_table(l, df = dta) From acb505927e354b77399f40f2017c093fa601298e Mon Sep 17 00:00:00 2001 From: Melkiades Date: Fri, 7 Feb 2025 16:29:29 +0100 Subject: [PATCH 03/12] as_factor_keep_attributes is open now --- NAMESPACE | 1 + R/utils_factor.R | 62 ++++++++-------- man/as_factor_keep_attributes.Rd | 31 -------- man/combine_levels.Rd | 30 -------- man/cut_quantile_bins.Rd | 4 +- man/factor_utils.Rd | 124 +++++++++++++++++++++++++++++++ man/fct_collapse_only.Rd | 39 ---------- man/fct_discard.Rd | 25 ------- man/fct_explicit_na_if.Rd | 31 -------- man/prop_diff.Rd | 71 ++++++++++-------- 10 files changed, 202 insertions(+), 216 deletions(-) delete mode 100644 man/as_factor_keep_attributes.Rd delete mode 100644 man/combine_levels.Rd create mode 100644 man/factor_utils.Rd delete mode 100644 man/fct_collapse_only.Rd delete mode 100644 man/fct_discard.Rd delete mode 100644 man/fct_explicit_na_if.Rd diff --git a/NAMESPACE b/NAMESPACE index a2ea4dbd23..4b6ea88c32 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -45,6 +45,7 @@ export(analyze_vars_in_cols) export(append_varlabels) export(arrange_grobs) export(as.rtable) +export(as_factor_keep_attributes) export(combine_counts) export(combine_groups) export(combine_levels) diff --git a/R/utils_factor.R b/R/utils_factor.R index 669bb93279..0e64704a24 100644 --- a/R/utils_factor.R +++ b/R/utils_factor.R @@ -1,14 +1,23 @@ -#' Combine factor levels +#' Factor utilities #' #' @description `r lifecycle::badge("stable")` #' -#' Combine specified old factor Levels in a single new level. +#' A collection of utility functions for factors. +#' +#' @param x (`factor`)\cr factor variable or object to convert (for `as_factor_keep_attributes`). +#' +#' @seealso [cut_quantile_bins()] for splitting numeric vectors into quantile bins. +#' +#' @name factor_utils +NULL + +#' @describeIn factor_utils Combine specified old factor Levels in a single new level. #' -#' @param x (`factor`)\cr factor variable. #' @param levels (`character`)\cr level names to be combined. #' @param new_level (`string`)\cr name of new level. #' -#' @return A `factor` with the new levels. +#' @return +#' * `combine_levels`: A `factor` with the new levels. #' #' @examples #' x <- factor(letters[1:5], levels = letters[5:1]) @@ -32,18 +41,23 @@ combine_levels <- function(x, levels, new_level = paste(levels, collapse = "/")) #' Conversion of a vector to a factor #' -#' Converts `x` to a factor and keeps its attributes. Warns appropriately such that the user +#' @describeIn factor_utils Converts `x` to a factor and keeps its attributes. Warns appropriately such that the user #' can decide whether they prefer converting to factor manually (e.g. for full control of #' factor levels). #' -#' @param x (`vector`)\cr object to convert. #' @param x_name (`string`)\cr name of `x`. #' @param na_level (`string`)\cr the explicit missing level which should be used when converting a character vector. #' @param verbose (`flag`)\cr defaults to `TRUE`. It prints out warnings and messages. #' -#' @return A `factor` with same attributes (except class) as `x`. Does not modify `x` if already a `factor`. +#' @return +#' * `as_factor_keep_attributes`: A `factor` with same attributes (except class) as `x`. Does not modify `x` if already a `factor`. #' -#' @keywords internal +#' @examples +#' a_chr_with_labels <- c("a", "b", NA) +#' attr(a_chr_with_labels, "label") <- "A character vector with labels" +#' as_factor_keep_attributes(a_chr_with_labels) +#' +#' @export as_factor_keep_attributes <- function(x, x_name = deparse(substitute(x)), na_level = "", @@ -132,7 +146,8 @@ bins_percent_labels <- function(probs, #' @param type (`integer(1)`)\cr type of quantiles to use, see [stats::quantile()] for details. #' @param ordered (`flag`)\cr should the result be an ordered factor. #' -#' @return A `factor` variable with appropriately-labeled bins as levels. +#' @return +#' * `cut_quantile_bins`: A `factor` variable with appropriately-labeled bins as levels. #' #' @note Intervals are closed on the right side. That is, the first bin is the interval #' `[-Inf, q1]` where `q1` is the first quantile, the second bin is then `(q1, q2]`, etc., @@ -192,16 +207,12 @@ cut_quantile_bins <- function(x, ) } -#' Discard specified levels of a factor +#' @describeIn factor_utils This discards the observations as well as the levels specified from a factor. #' -#' @description `r lifecycle::badge("stable")` -#' -#' This discards the observations as well as the levels specified from a factor. -#' -#' @param x (`factor`)\cr the original factor. #' @param discard (`character`)\cr levels to discard. #' -#' @return A modified `factor` with observations as well as levels from `discard` dropped. +#' @return +#' * `fct_discard`: A modified `factor` with observations as well as levels from `discard` dropped. #' #' @examples #' fct_discard(factor(c("a", "b", "c")), "c") @@ -215,18 +226,14 @@ fct_discard <- function(x, discard) { factor(new_obs, levels = new_levels) } -#' Insertion of explicit missing values in a factor -#' -#' @description `r lifecycle::badge("stable")` -#' -#' This inserts explicit missing values in a factor based on a condition. Additionally, +#' @describeIn factor_utils This inserts explicit missing values in a factor based on a condition. Additionally, #' existing `NA` values will be explicitly converted to given `na_level`. #' -#' @param x (`factor`)\cr the original factor. #' @param condition (`logical`)\cr positions at which to insert missing values. #' @param na_level (`string`)\cr which level to use for missing values. #' -#' @return A modified `factor` with inserted and existing `NA` converted to `na_level`. +#' @return +#' * `fct_explicit_na_if`: A modified `factor` with inserted and existing `NA` converted to `na_level`. #' #' @seealso [forcats::fct_na_value_to_level()] which is used internally. #' @@ -242,11 +249,7 @@ fct_explicit_na_if <- function(x, condition, na_level = "") { forcats::fct_drop(x, only = na_level) } -#' Collapse factor levels and keep only those new group levels -#' -#' @description `r lifecycle::badge("stable")` -#' -#' This collapses levels and only keeps those new group levels, in the order provided. +#' @describeIn factor_utils This collapses levels and only keeps those new group levels, in the order provided. #' The returned factor has levels in the order given, with the possible missing level last (this will #' only be included if there are missing values). #' @@ -256,7 +259,8 @@ fct_explicit_na_if <- function(x, condition, na_level = "") { #' @param .na_level (`string`)\cr which level to use for other levels, which should be missing in the #' new factor. Note that this level must not be contained in the new levels specified in `...`. #' -#' @return A modified `factor` with collapsed levels. Values and levels which are not included +#' @return +#' * `fct_collapse_only`: A modified `factor` with collapsed levels. Values and levels which are not included #' in the given `character` vector input will be set to the missing level `.na_level`. #' #' @note Any existing `NA`s in the input vector will not be replaced by the missing level. If needed, diff --git a/man/as_factor_keep_attributes.Rd b/man/as_factor_keep_attributes.Rd deleted file mode 100644 index 02094fcd74..0000000000 --- a/man/as_factor_keep_attributes.Rd +++ /dev/null @@ -1,31 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils_factor.R -\name{as_factor_keep_attributes} -\alias{as_factor_keep_attributes} -\title{Conversion of a vector to a factor} -\usage{ -as_factor_keep_attributes( - x, - x_name = deparse(substitute(x)), - na_level = "", - verbose = TRUE -) -} -\arguments{ -\item{x}{(\code{vector})\cr object to convert.} - -\item{x_name}{(\code{string})\cr name of \code{x}.} - -\item{na_level}{(\code{string})\cr the explicit missing level which should be used when converting a character vector.} - -\item{verbose}{(\code{flag})\cr defaults to \code{TRUE}. It prints out warnings and messages.} -} -\value{ -A \code{factor} with same attributes (except class) as \code{x}. Does not modify \code{x} if already a \code{factor}. -} -\description{ -Converts \code{x} to a factor and keeps its attributes. Warns appropriately such that the user -can decide whether they prefer converting to factor manually (e.g. for full control of -factor levels). -} -\keyword{internal} diff --git a/man/combine_levels.Rd b/man/combine_levels.Rd deleted file mode 100644 index b6d6fde6dc..0000000000 --- a/man/combine_levels.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils_factor.R -\name{combine_levels} -\alias{combine_levels} -\title{Combine factor levels} -\usage{ -combine_levels(x, levels, new_level = paste(levels, collapse = "/")) -} -\arguments{ -\item{x}{(\code{factor})\cr factor variable.} - -\item{levels}{(\code{character})\cr level names to be combined.} - -\item{new_level}{(\code{string})\cr name of new level.} -} -\value{ -A \code{factor} with the new levels. -} -\description{ -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} - -Combine specified old factor Levels in a single new level. -} -\examples{ -x <- factor(letters[1:5], levels = letters[5:1]) -combine_levels(x, levels = c("a", "b")) - -combine_levels(x, c("e", "b")) - -} diff --git a/man/cut_quantile_bins.Rd b/man/cut_quantile_bins.Rd index d2d7687fd0..7d60a208d8 100644 --- a/man/cut_quantile_bins.Rd +++ b/man/cut_quantile_bins.Rd @@ -29,7 +29,9 @@ probabilities in \code{probs}, then this must be \code{n + 1} long.} \item{ordered}{(\code{flag})\cr should the result be an ordered factor.} } \value{ -A \code{factor} variable with appropriately-labeled bins as levels. +\itemize{ +\item \code{cut_quantile_bins}: A \code{factor} variable with appropriately-labeled bins as levels. +} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} diff --git a/man/factor_utils.Rd b/man/factor_utils.Rd new file mode 100644 index 0000000000..ce6cf1a829 --- /dev/null +++ b/man/factor_utils.Rd @@ -0,0 +1,124 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils_factor.R +\name{factor_utils} +\alias{factor_utils} +\alias{combine_levels} +\alias{as_factor_keep_attributes} +\alias{fct_discard} +\alias{fct_explicit_na_if} +\alias{fct_collapse_only} +\title{Factor utilities} +\usage{ +combine_levels(x, levels, new_level = paste(levels, collapse = "/")) + +as_factor_keep_attributes( + x, + x_name = deparse(substitute(x)), + na_level = "", + verbose = TRUE +) + +fct_discard(x, discard) + +fct_explicit_na_if(x, condition, na_level = "") + +fct_collapse_only(.f, ..., .na_level = "") +} +\arguments{ +\item{x}{(\code{factor})\cr factor variable or object to convert (for \code{as_factor_keep_attributes}).} + +\item{levels}{(\code{character})\cr level names to be combined.} + +\item{new_level}{(\code{string})\cr name of new level.} + +\item{x_name}{(\code{string})\cr name of \code{x}.} + +\item{na_level}{(\code{string})\cr which level to use for missing values.} + +\item{verbose}{(\code{flag})\cr defaults to \code{TRUE}. It prints out warnings and messages.} + +\item{discard}{(\code{character})\cr levels to discard.} + +\item{condition}{(\code{logical})\cr positions at which to insert missing values.} + +\item{.f}{(\code{factor} or \code{character})\cr original vector.} + +\item{...}{(named \code{character})\cr levels in each vector provided will be collapsed into +the new level given by the respective name.} + +\item{.na_level}{(\code{string})\cr which level to use for other levels, which should be missing in the +new factor. Note that this level must not be contained in the new levels specified in \code{...}.} +} +\value{ +\itemize{ +\item \code{combine_levels}: A \code{factor} with the new levels. +} + +\itemize{ +\item \code{as_factor_keep_attributes}: A \code{factor} with same attributes (except class) as \code{x}. Does not modify \code{x} if already a \code{factor}. +} + +\itemize{ +\item \code{fct_discard}: A modified \code{factor} with observations as well as levels from \code{discard} dropped. +} + +\itemize{ +\item \code{fct_explicit_na_if}: A modified \code{factor} with inserted and existing \code{NA} converted to \code{na_level}. +} + +\itemize{ +\item \code{fct_collapse_only}: A modified \code{factor} with collapsed levels. Values and levels which are not included +in the given \code{character} vector input will be set to the missing level \code{.na_level}. +} +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} + +A collection of utility functions for factors. +} +\section{Functions}{ +\itemize{ +\item \code{combine_levels()}: Combine specified old factor Levels in a single new level. + +\item \code{as_factor_keep_attributes()}: Converts \code{x} to a factor and keeps its attributes. Warns appropriately such that the user +can decide whether they prefer converting to factor manually (e.g. for full control of +factor levels). + +\item \code{fct_discard()}: This discards the observations as well as the levels specified from a factor. + +\item \code{fct_explicit_na_if()}: This inserts explicit missing values in a factor based on a condition. Additionally, +existing \code{NA} values will be explicitly converted to given \code{na_level}. + +\item \code{fct_collapse_only()}: This collapses levels and only keeps those new group levels, in the order provided. +The returned factor has levels in the order given, with the possible missing level last (this will +only be included if there are missing values). + +}} +\note{ +Any existing \code{NA}s in the input vector will not be replaced by the missing level. If needed, +\code{\link[=explicit_na]{explicit_na()}} can be called separately on the result. +} +\examples{ +x <- factor(letters[1:5], levels = letters[5:1]) +combine_levels(x, levels = c("a", "b")) + +combine_levels(x, c("e", "b")) + +a_chr_with_labels <- c("a", "b", NA) +attr(a_chr_with_labels, "label") <- "A character vector with labels" +as_factor_keep_attributes(a_chr_with_labels) + +fct_discard(factor(c("a", "b", "c")), "c") + +fct_explicit_na_if(factor(c("a", "b", NA)), c(TRUE, FALSE, FALSE)) + +fct_collapse_only(factor(c("a", "b", "c", "d")), TRT = "b", CTRL = c("c", "d")) + +} +\seealso{ +\code{\link[=cut_quantile_bins]{cut_quantile_bins()}} for splitting numeric vectors into quantile bins. + +\code{\link[forcats:fct_na_value_to_level]{forcats::fct_na_value_to_level()}} which is used internally. + +\code{\link[forcats:fct_collapse]{forcats::fct_collapse()}}, \code{\link[forcats:fct_relevel]{forcats::fct_relevel()}} which are used internally. +} diff --git a/man/fct_collapse_only.Rd b/man/fct_collapse_only.Rd deleted file mode 100644 index fb4785c099..0000000000 --- a/man/fct_collapse_only.Rd +++ /dev/null @@ -1,39 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils_factor.R -\name{fct_collapse_only} -\alias{fct_collapse_only} -\title{Collapse factor levels and keep only those new group levels} -\usage{ -fct_collapse_only(.f, ..., .na_level = "") -} -\arguments{ -\item{.f}{(\code{factor} or \code{character})\cr original vector.} - -\item{...}{(named \code{character})\cr levels in each vector provided will be collapsed into -the new level given by the respective name.} - -\item{.na_level}{(\code{string})\cr which level to use for other levels, which should be missing in the -new factor. Note that this level must not be contained in the new levels specified in \code{...}.} -} -\value{ -A modified \code{factor} with collapsed levels. Values and levels which are not included -in the given \code{character} vector input will be set to the missing level \code{.na_level}. -} -\description{ -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} - -This collapses levels and only keeps those new group levels, in the order provided. -The returned factor has levels in the order given, with the possible missing level last (this will -only be included if there are missing values). -} -\note{ -Any existing \code{NA}s in the input vector will not be replaced by the missing level. If needed, -\code{\link[=explicit_na]{explicit_na()}} can be called separately on the result. -} -\examples{ -fct_collapse_only(factor(c("a", "b", "c", "d")), TRT = "b", CTRL = c("c", "d")) - -} -\seealso{ -\code{\link[forcats:fct_collapse]{forcats::fct_collapse()}}, \code{\link[forcats:fct_relevel]{forcats::fct_relevel()}} which are used internally. -} diff --git a/man/fct_discard.Rd b/man/fct_discard.Rd deleted file mode 100644 index 31b0fd13c8..0000000000 --- a/man/fct_discard.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils_factor.R -\name{fct_discard} -\alias{fct_discard} -\title{Discard specified levels of a factor} -\usage{ -fct_discard(x, discard) -} -\arguments{ -\item{x}{(\code{factor})\cr the original factor.} - -\item{discard}{(\code{character})\cr levels to discard.} -} -\value{ -A modified \code{factor} with observations as well as levels from \code{discard} dropped. -} -\description{ -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} - -This discards the observations as well as the levels specified from a factor. -} -\examples{ -fct_discard(factor(c("a", "b", "c")), "c") - -} diff --git a/man/fct_explicit_na_if.Rd b/man/fct_explicit_na_if.Rd deleted file mode 100644 index d38677d05a..0000000000 --- a/man/fct_explicit_na_if.Rd +++ /dev/null @@ -1,31 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils_factor.R -\name{fct_explicit_na_if} -\alias{fct_explicit_na_if} -\title{Insertion of explicit missing values in a factor} -\usage{ -fct_explicit_na_if(x, condition, na_level = "") -} -\arguments{ -\item{x}{(\code{factor})\cr the original factor.} - -\item{condition}{(\code{logical})\cr positions at which to insert missing values.} - -\item{na_level}{(\code{string})\cr which level to use for missing values.} -} -\value{ -A modified \code{factor} with inserted and existing \code{NA} converted to \code{na_level}. -} -\description{ -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} - -This inserts explicit missing values in a factor based on a condition. Additionally, -existing \code{NA} values will be explicitly converted to given \code{na_level}. -} -\examples{ -fct_explicit_na_if(factor(c("a", "b", NA)), c(TRUE, FALSE, FALSE)) - -} -\seealso{ -\code{\link[forcats:fct_na_value_to_level]{forcats::fct_na_value_to_level()}} which is used internally. -} diff --git a/man/prop_diff.Rd b/man/prop_diff.Rd index 658a31c49d..46f416893a 100644 --- a/man/prop_diff.Rd +++ b/man/prop_diff.Rd @@ -10,21 +10,24 @@ estimate_proportion_diff( lyt, vars, + var_labels = vars, + na_str = default_na_str(), + nested = TRUE, + show_labels = "default", + table_names = vars, + section_div = NA_character_, + ..., + na_rm = TRUE, variables = list(strata = NULL), conf_level = 0.95, method = c("waldcc", "wald", "cmh", "ha", "newcombe", "newcombecc", "strat_newcombe", "strat_newcombecc"), weights_method = "cmh", - na_str = default_na_str(), - nested = TRUE, - ..., - var_labels = vars, - show_labels = "hidden", - table_names = vars, - .stats = NULL, - .formats = NULL, + .stats = c("diff", "diff_ci"), + .stat_names = NULL, + .formats = c(diff = "xx.x", diff_ci = "(xx.x, xx.x)"), .labels = NULL, - .indent_mods = NULL + .indent_mods = c(diff = 0L, diff_ci = 1L) ) s_proportion_diff( @@ -36,19 +39,18 @@ s_proportion_diff( conf_level = 0.95, method = c("waldcc", "wald", "cmh", "ha", "newcombe", "newcombecc", "strat_newcombe", "strat_newcombecc"), - weights_method = "cmh" + weights_method = "cmh", + ... ) a_proportion_diff( df, - .var, - .ref_group, - .in_ref_col, - variables = list(strata = NULL), - conf_level = 0.95, - method = c("waldcc", "wald", "cmh", "ha", "newcombe", "newcombecc", "strat_newcombe", - "strat_newcombecc"), - weights_method = "cmh" + ..., + .stats = NULL, + .stat_names = NULL, + .formats = NULL, + .labels = NULL, + .indent_mods = NULL ) } \arguments{ @@ -56,14 +58,7 @@ a_proportion_diff( \item{vars}{(\code{character})\cr variable names for the primary analysis variable to be iterated over.} -\item{variables}{(named \code{list} of \code{string})\cr list of additional analysis variables.} - -\item{conf_level}{(\code{proportion})\cr confidence level of the interval.} - -\item{method}{(\code{string})\cr the method used for the confidence interval estimation.} - -\item{weights_method}{(\code{string})\cr weights method. Can be either \code{"cmh"} or \code{"heuristic"} -and directs the way weights are estimated.} +\item{var_labels}{(\code{character})\cr variable labels.} \item{na_str}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.} @@ -71,19 +66,34 @@ and directs the way weights are estimated.} possible (\code{TRUE}, the default) or as a new top-level element (\code{FALSE}). Ignored if it would nest a split. underneath analyses, which is not allowed.} -\item{...}{additional arguments for the lower level functions.} - -\item{var_labels}{(\code{character})\cr variable labels.} - \item{show_labels}{(\code{string})\cr label visibility: one of "default", "visible" and "hidden".} \item{table_names}{(\code{character})\cr this can be customized in the case that the same \code{vars} are analyzed multiple times, to avoid warnings from \code{rtables}.} +\item{section_div}{(\code{string})\cr string which should be repeated as a section divider after each group +defined by this split instruction, or \code{NA_character_} (the default) for no section divider.} + +\item{...}{additional arguments for the lower level functions.} + +\item{na_rm}{(\code{flag})\cr whether \code{NA} values should be removed from \code{x} prior to analysis.} + +\item{variables}{(named \code{list} of \code{string})\cr list of additional analysis variables.} + +\item{conf_level}{(\code{proportion})\cr confidence level of the interval.} + +\item{method}{(\code{string})\cr the method used for the confidence interval estimation.} + +\item{weights_method}{(\code{string})\cr weights method. Can be either \code{"cmh"} or \code{"heuristic"} +and directs the way weights are estimated.} + \item{.stats}{(\code{character})\cr statistics to select for the table. Options are: \verb{'diff', 'diff_ci'}} +\item{.stat_names}{(\code{character})\cr names of the statistics that are passed directly to name single statistics +(\code{.stats}). This option is visible when producing \code{\link[rtables:data.frame_export]{rtables::as_result_df()}} with \code{make_ard = TRUE}.} + \item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics. See Details in \code{analyze_vars} for more information on the \code{"auto"} setting.} @@ -183,6 +193,7 @@ s_proportion_diff( a_proportion_diff( df = subset(dta, grp == "A"), + .stats = c("diff"), .var = "rsp", .ref_group = subset(dta, grp == "B"), .in_ref_col = FALSE, From b1e08551b0e9532bbbe3d2f25f55c5ff316df931 Mon Sep 17 00:00:00 2001 From: Melkiades Date: Fri, 14 Feb 2025 15:47:07 +0100 Subject: [PATCH 04/12] fix --- R/prop_diff.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/prop_diff.R b/R/prop_diff.R index e0afe452c6..68c9e4096b 100644 --- a/R/prop_diff.R +++ b/R/prop_diff.R @@ -193,7 +193,10 @@ a_proportion_diff <- function(df, ) # Fill in with stats defaults if needed - met_grp <- get_stats("estimate_proportion_diff", stats_in = .stats) + .stats <- c( + get_stats("estimate_proportion_diff", stats_in = .stats), + names(custom_stat_functions) + ) x_stats <- x_stats[.stats] From 4b1e8e0f923e67da60c27b2279f1b626073e6886 Mon Sep 17 00:00:00 2001 From: Melkiades Date: Fri, 14 Feb 2025 16:16:02 +0100 Subject: [PATCH 05/12] add --- R/prop_diff_test.R | 136 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 111 insertions(+), 25 deletions(-) diff --git a/R/prop_diff_test.R b/R/prop_diff_test.R index 5af48ada18..ac97846dd6 100644 --- a/R/prop_diff_test.R +++ b/R/prop_diff_test.R @@ -32,9 +32,10 @@ s_test_proportion_diff <- function(df, .ref_group, .in_ref_col, variables = list(strata = NULL), - method = c("chisq", "schouten", "fisher", "cmh")) { + method = c("chisq", "schouten", "fisher", "cmh"), + ...) { method <- match.arg(method) - y <- list(pval = "") + y <- list(pval = character()) if (!.in_ref_col) { assert_df_with_variables(df, list(rsp = .var)) @@ -103,11 +104,74 @@ d_test_proportion_diff <- function(method) { #' * `a_test_proportion_diff()` returns the corresponding list with formatted [rtables::CellValue()]. #' #' @keywords internal -a_test_proportion_diff <- make_afun( - s_test_proportion_diff, - .formats = c(pval = "x.xxxx | (<0.0001)"), - .indent_mods = c(pval = 1L) -) +a_test_proportion_diff <- function(df, + ..., + .stats = NULL, + .stat_names = NULL, + .formats = NULL, + .labels = NULL, + .indent_mods = NULL) { + dots_extra_args <- list(...) + + # Check if there are user-defined functions + default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) + .stats <- default_and_custom_stats_list$default_stats + custom_stat_functions <- default_and_custom_stats_list$custom_stats + + # Adding automatically extra parameters to the statistic function (see ?rtables::additional_fun_params) + extra_afun_params <- retrieve_extra_afun_params( + names(dots_extra_args$.additional_fun_parameters) + ) + dots_extra_args$.additional_fun_parameters <- NULL # After extraction we do not need them anymore + + # Main statistical functions application + x_stats <- .apply_stat_functions( + default_stat_fnc = s_test_proportion_diff, + custom_stat_fnc_list = custom_stat_functions, + args_list = c( + df = list(df), + extra_afun_params, + dots_extra_args + ) + ) + + # Fill in with stats defaults if needed + .stats <- c( + get_stats("test_proportion_diff", stats_in = .stats), + names(custom_stat_functions) + ) + + x_stats <- x_stats[.stats] + + # Fill in formats/indents/labels with custom input and defaults + .formats <- get_formats_from_stats(.stats, .formats) + .indent_mods <- get_indents_from_stats(.stats, .indent_mods) + if (is.null(.labels)) { + .labels <- sapply(x_stats, attr, "label") + } + .labels <- get_labels_from_stats(.stats, .labels) + + # Auto format handling + .formats <- apply_auto_formatting( + .formats, + x_stats, + extra_afun_params$.df_row, + extra_afun_params$.var + ) + + # Get and check statistical names from defaults + .stat_names <- get_stat_names(x_stats, .stat_names) # note is x_stats + .stat_names <- paste0(.stat_names, "_", dots_extra_args$method) + + in_rows( + .list = x_stats, + .formats = .formats, + .names = names(.labels), + .stat_names = .stat_names, + .labels = .labels %>% .unlist_keep_nulls(), + .indent_mods = .indent_mods %>% .unlist_keep_nulls() + ) +} #' @describeIn prop_diff_test Layout-creating function which can take statistics function arguments #' and additional format arguments. This function is a wrapper for [rtables::analyze()]. @@ -138,37 +202,59 @@ a_test_proportion_diff <- make_afun( #' @order 2 test_proportion_diff <- function(lyt, vars, - variables = list(strata = NULL), - method = c("chisq", "schouten", "fisher", "cmh"), + var_labels = vars, na_str = default_na_str(), nested = TRUE, - ..., - var_labels = vars, show_labels = "hidden", table_names = vars, - .stats = NULL, - .formats = NULL, + section_div = NA_character_, + ..., + na_rm = TRUE, + variables = list(strata = NULL), + # conf_level = 0.95, + method = c("chisq", "schouten", "fisher", "cmh"), + .stats = c("pval"), + # .stats = c("diff", "diff_ci"), + .stat_names = NULL, + .formats = c(pval = "x.xxxx | (<0.0001)"), .labels = NULL, - .indent_mods = NULL) { - extra_args <- list(variables = variables, method = method, ...) + .indent_mods = c(pval = 1L)) { + # Depending on main functions + extra_args <- list( + "na_rm" = na_rm, + "variables" = variables, + # "conf_level" = conf_level, + "method" = method, + ... + ) - afun <- make_afun( - a_test_proportion_diff, - .stats = .stats, - .formats = .formats, - .labels = .labels, - .indent_mods = .indent_mods + # Needed defaults + if (!is.null(.stats)) extra_args[[".stats"]] <- .stats + if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names + if (!is.null(.formats)) extra_args[[".formats"]] <- .formats + if (!is.null(.labels)) extra_args[[".labels"]] <- .labels + if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods + + # Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params) + extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) + formals(a_test_proportion_diff) <- c( + formals(a_test_proportion_diff), + extra_args[[".additional_fun_parameters"]] ) + + # Main {rtables} structural call analyze( - lyt, - vars, - afun = afun, + lyt = lyt, + vars = vars, var_labels = var_labels, + afun = a_test_proportion_diff, na_str = na_str, + inclNAs = !na_rm, nested = nested, extra_args = extra_args, show_labels = show_labels, - table_names = table_names + table_names = table_names, + section_div = section_div ) } From 2fd1adbb871539d2f36ff1be17ec95843d7e8b8a Mon Sep 17 00:00:00 2001 From: Melkiades Date: Mon, 17 Feb 2025 12:48:17 +0100 Subject: [PATCH 06/12] riskdiff is gonna be difficult --- R/incidence_rate.R | 4 +- R/prop_diff.R | 1 + R/riskdiff.R | 27 +- R/summarize_num_patients.R | 230 +++++++++++++----- .../testthat/_snaps/summarize_num_patients.md | 64 ----- 5 files changed, 184 insertions(+), 142 deletions(-) diff --git a/R/incidence_rate.R b/R/incidence_rate.R index c517c35e8c..383fcd7784 100644 --- a/R/incidence_rate.R +++ b/R/incidence_rate.R @@ -155,10 +155,10 @@ a_incidence_rate <- function(df, # Fill in with defaults formats_def <- formals()$.formats %>% eval() .formats <- c(.formats, formats_def)[!duplicated(names(c(.formats, formats_def)))] - labels_def <- sapply(x_stats, \(x) attributes(x)$label) + labels_def <- sapply(x_stats, function(x) attributes(x)$label) .labels <- c(.labels, labels_def)[!duplicated(names(c(.labels, labels_def)))] if (nzchar(labelstr) > 0) { - .labels <- sapply(.labels, \(x) gsub("%.labels", x, gsub("%s", labelstr, label_fmt))) + .labels <- sapply(.labels, function(x) gsub("%.labels", x, gsub("%s", labelstr, label_fmt))) } # Fill in with formatting defaults if needed diff --git a/R/prop_diff.R b/R/prop_diff.R index 68c9e4096b..e5d045b707 100644 --- a/R/prop_diff.R +++ b/R/prop_diff.R @@ -205,6 +205,7 @@ a_proportion_diff <- function(df, .indent_mods <- get_indents_from_stats(.stats, .indent_mods) if (is.null(.labels)) { .labels <- sapply(x_stats, attr, "label") + .labels <- .labels[nzchar(.labels)] } .labels <- get_labels_from_stats(.stats, .labels) diff --git a/R/riskdiff.R b/R/riskdiff.R index 0bea490e30..403870851e 100644 --- a/R/riskdiff.R +++ b/R/riskdiff.R @@ -85,19 +85,9 @@ add_riskdiff <- function(arm_x, #' @keywords internal afun_riskdiff <- function(df, labelstr = "", - .var, - .N_col, # nolint - .N_row, # nolint - .df_row, - .spl_context, - .all_col_counts, - .stats, - .formats = NULL, - .labels = NULL, - .indent_mods = NULL, - na_str = default_na_str(), afun, - s_args = list()) { + s_args = list(), + ...) { if (!any(grepl("riskdiff", names(.spl_context)))) { stop( "Please set up levels to use in risk difference calculations using the `add_riskdiff` ", @@ -106,9 +96,10 @@ afun_riskdiff <- function(df, } checkmate::assert_list(afun, len = 1, types = "function") checkmate::assert_named(afun) - afun_args <- list( - .var = .var, .df_row = .df_row, .N_row = .N_row, denom = "N_col", labelstr = labelstr, - .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str + browser() + afun_args <- c( + .var = .var, list(.df_row = .df_row), .N_row = .N_row, denom = "N_col", labelstr = labelstr, + s_args ) afun_args <- afun_args[intersect(names(afun_args), names(as.list(args(afun[[1]]))))] if ("denom" %in% names(s_args)) afun_args[["denom"]] <- NULL @@ -116,7 +107,7 @@ afun_riskdiff <- function(df, cur_split <- tail(.spl_context$cur_col_split_val[[1]], 1) if (!grepl("^riskdiff", cur_split)) { # Apply basic afun (no risk difference) in all other columns - do.call(afun[[1]], args = c(list(df = df, .N_col = .N_col), afun_args, s_args)) + do.call(afun[[1]], args = c(list(df = df, .var = .var, .N_col = .N_col, .spl_context = .spl_context), afun_args)) } else { arm_x <- strsplit(cur_split, "_")[[1]][2] arm_y <- strsplit(cur_split, "_")[[1]][3] @@ -156,9 +147,9 @@ afun_riskdiff <- function(df, N_col_x, N_col_y, list_names = var_nms, pct = pct - ), max(1, length(.stats))) + ), max(1, length(s_args$.stats))) - in_rows(.list = rd_ci, .formats = "xx.x (xx.x - xx.x)", .indent_mods = .indent_mods) + in_rows(.list = rd_ci, .formats = "xx.x (xx.x - xx.x)", .indent_mods = s_args$.indent_mods) } } diff --git a/R/summarize_num_patients.R b/R/summarize_num_patients.R index e9b8bdefc8..1702af9d67 100644 --- a/R/summarize_num_patients.R +++ b/R/summarize_num_patients.R @@ -50,8 +50,7 @@ NULL #' ) #' #' @export -s_num_patients <- function(x, labelstr, .N_col, count_by = NULL, unique_count_suffix = TRUE) { # nolint - +s_num_patients <- function(x, labelstr = "", .N_col, count_by = NULL, unique_count_suffix = TRUE, ...) { checkmate::assert_string(labelstr) checkmate::assert_count(.N_col) checkmate::assert_multi_class(x, classes = c("factor", "character")) @@ -105,7 +104,8 @@ s_num_patients_content <- function(df, .var, required = NULL, count_by = NULL, - unique_count_suffix = TRUE) { + unique_count_suffix = TRUE, + ...) { checkmate::assert_string(.var) checkmate::assert_data_frame(df) if (is.null(count_by)) { @@ -119,23 +119,103 @@ s_num_patients_content <- function(df, df <- df[!is.na(df[[required]]), , drop = FALSE] } - x <- df[[.var]] y <- if (is.null(count_by)) NULL else df[[count_by]] s_num_patients( - x = x, + x = df[[.var]], labelstr = labelstr, .N_col = .N_col, count_by = y, - unique_count_suffix = unique_count_suffix + unique_count_suffix = unique_count_suffix, + ... ) } -c_num_patients <- make_afun( - s_num_patients_content, - .stats = c("unique", "nonunique", "unique_count"), - .formats = c(unique = format_count_fraction_fixed_dp, nonunique = "xx", unique_count = "xx") -) +#' @keywords internal +a_num_patients <- function(df, + labelstr = "", + ..., + .stats = NULL, + .stat_names = NULL, + .formats = NULL, + .labels = NULL, + .indent_mods = NULL) { + dots_extra_args <- list(...) + + # Check if there are user-defined functions + default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) + .stats <- default_and_custom_stats_list$default_stats + custom_stat_functions <- default_and_custom_stats_list$custom_stats + + # Adding automatically extra parameters to the statistic function (see ?rtables::additional_fun_params) + extra_afun_params <- retrieve_extra_afun_params( + names(dots_extra_args$.additional_fun_parameters) + ) + dots_extra_args$.additional_fun_parameters <- NULL # After extraction we do not need them anymore + + # Main statistical functions application + if (isTRUE(dots_extra_args$is_summary_content)) { + x_stats <- .apply_stat_functions( + default_stat_fnc = s_num_patients_content, + custom_stat_fnc_list = custom_stat_functions, + args_list = c( + df = list(df), + extra_afun_params, + dots_extra_args + ) + ) + } else { + x_stats <- .apply_stat_functions( + default_stat_fnc = s_num_patients, + custom_stat_fnc_list = custom_stat_functions, + args_list = c( + x = list(df[[extra_afun_params$.var]]), + extra_afun_params, + dots_extra_args + ) + ) + } + + # Fill in with stats defaults if needed + .stats <- c( + get_stats("summarize_num_patients", stats_in = .stats), + names(custom_stat_functions) + ) + + x_stats <- x_stats[.stats] + + # Fill in formats/indents/labels with custom input and defaults + .formats <- get_formats_from_stats(.stats, .formats) + .indent_mods <- get_indents_from_stats(.stats, .indent_mods) + if (anyNA(.labels[names(x_stats)])) { + .labels <- setNames(.labels[names(x_stats)], names(x_stats)) + attr_labels <- sapply(x_stats, attr, "label") + attr_labels <- attr_labels[nzchar(attr_labels)] + .labels[names(.labels) %in% names(attr_labels) & is.na(.labels)] <- attr_labels + .labels <- .labels[!is.na(.labels)] + } + .labels <- get_labels_from_stats(.stats, .labels) + + # Auto format handling + .formats <- apply_auto_formatting( + .formats, + x_stats, + extra_afun_params$.df_row, + extra_afun_params$.var + ) + + # Get and check statistical names from defaults + .stat_names <- get_stat_names(x_stats, .stat_names) # note is x_stats + + in_rows( + .list = x_stats, + .formats = .formats, + .names = names(.labels), + .stat_names = .stat_names, + .labels = .labels %>% .unlist_keep_nulls(), + .indent_mods = .indent_mods %>% .unlist_keep_nulls() + ) +} #' @describeIn summarize_num_patients Layout-creating function which can take statistics function arguments #' and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()]. @@ -159,48 +239,62 @@ c_num_patients <- make_afun( #' @order 3 summarize_num_patients <- function(lyt, var, + na_str = default_na_str(), + riskdiff = FALSE, + ..., + na_rm = TRUE, required = NULL, count_by = NULL, unique_count_suffix = TRUE, - na_str = default_na_str(), - .stats = NULL, - .formats = NULL, + .stats = c("unique", "nonunique", "unique_count"), + .stat_names = NULL, + .formats = c(unique = format_count_fraction_fixed_dp, nonunique = "xx", unique_count = "xx"), .labels = c( unique = "Number of patients with at least one event", nonunique = "Number of events" ), - .indent_mods = 0L, - riskdiff = FALSE, - ...) { + .indent_mods = 0L) { checkmate::assert_flag(riskdiff) - if (is.null(.stats)) .stats <- c("unique", "nonunique", "unique_count") - if (length(.labels) > length(.stats)) .labels <- .labels[names(.labels) %in% .stats] - - s_args <- list(required = required, count_by = count_by, unique_count_suffix = unique_count_suffix, ...) - - cfun <- make_afun( - c_num_patients, - .stats = .stats, - .formats = .formats, - .labels = .labels + # Depending on main functions + extra_args <- list( + "na_rm" = na_rm, + "required" = required, + "count_by" = count_by, + "unique_count_suffix" = unique_count_suffix, + "is_summary_content" = TRUE, # flag for analysis function + ... ) + # Needed defaults + if (!is.null(.stats)) extra_args[[".stats"]] <- .stats + if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names + if (!is.null(.formats)) extra_args[[".formats"]] <- .formats + if (!is.null(.labels)) extra_args[[".labels"]] <- .labels + if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods + + # Riskdiff directive + cfun <- ifelse(isFALSE(riskdiff), a_num_patients, afun_riskdiff) extra_args <- if (isFALSE(riskdiff)) { - s_args + extra_args } else { list( - afun = list("s_num_patients_content" = cfun), - .stats = .stats, - .indent_mods = .indent_mods, - s_args = s_args + afun = list("s_num_patients_content" = a_num_patients), + s_args = extra_args ) } + # Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params) + extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) + formals(cfun) <- c( + formals(cfun), + extra_args[[".additional_fun_parameters"]] + ) + summarize_row_groups( lyt = lyt, var = var, - cfun = ifelse(isFALSE(riskdiff), cfun, afun_riskdiff), + cfun = cfun, na_str = na_str, extra_args = extra_args, indent_mod = .indent_mods @@ -245,54 +339,74 @@ summarize_num_patients <- function(lyt, #' @order 2 analyze_num_patients <- function(lyt, vars, + var_labels = vars, + riskdiff = FALSE, + na_str = default_na_str(), + nested = TRUE, + table_names = vars, + show_labels = c("default", "visible", "hidden"), + section_div = NA_character_, + ..., + na_rm = TRUE, required = NULL, count_by = NULL, unique_count_suffix = TRUE, - na_str = default_na_str(), - nested = TRUE, - .stats = NULL, - .formats = NULL, + .stats = c("unique", "nonunique", "unique_count"), + .stat_names = NULL, + .formats = c(unique = format_count_fraction_fixed_dp, nonunique = "xx", unique_count = "xx"), .labels = c( unique = "Number of patients with at least one event", nonunique = "Number of events" ), - show_labels = c("default", "visible", "hidden"), - .indent_mods = 0L, - riskdiff = FALSE, - ...) { + .indent_mods = 0L) { checkmate::assert_flag(riskdiff) - if (is.null(.stats)) .stats <- c("unique", "nonunique", "unique_count") - if (length(.labels) > length(.stats)) .labels <- .labels[names(.labels) %in% .stats] - - s_args <- list(required = required, count_by = count_by, unique_count_suffix = unique_count_suffix, ...) - - afun <- make_afun( - c_num_patients, - .stats = .stats, - .formats = .formats, - .labels = .labels + # Depending on main functions + extra_args <- list( + "na_rm" = na_rm, + "required" = required, + "count_by" = count_by, + "unique_count_suffix" = unique_count_suffix, + ... ) + # Needed defaults + if (!is.null(.stats)) extra_args[[".stats"]] <- .stats + if (!is.null(.stat_names)) extra_args[[".stat_names"]] <- .stat_names + if (!is.null(.formats)) extra_args[[".formats"]] <- .formats + if (!is.null(.labels)) extra_args[[".labels"]] <- .labels + if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods + + # Riskdiff directive + afun <- ifelse(isFALSE(riskdiff), a_num_patients, afun_riskdiff) extra_args <- if (isFALSE(riskdiff)) { - s_args + extra_args } else { list( - afun = list("s_num_patients_content" = afun), - .stats = .stats, - .indent_mods = .indent_mods, - s_args = s_args + afun = list("s_num_patients_content" = a_num_patients), + s_args = extra_args ) } + # Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params) + extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) + formals(afun) <- c( + formals(afun), + extra_args[[".additional_fun_parameters"]] + ) + + # Main {rtables} structural call analyze( - afun = ifelse(isFALSE(riskdiff), afun, afun_riskdiff), lyt = lyt, vars = vars, + var_labels = var_labels, + afun = afun, na_str = na_str, + inclNAs = !na_rm, nested = nested, extra_args = extra_args, show_labels = show_labels, - indent_mod = .indent_mods + table_names = table_names, + section_div = section_div ) } diff --git a/tests/testthat/_snaps/summarize_num_patients.md b/tests/testthat/_snaps/summarize_num_patients.md index fb4b2d2276..b8626d194e 100644 --- a/tests/testthat/_snaps/summarize_num_patients.md +++ b/tests/testthat/_snaps/summarize_num_patients.md @@ -327,67 +327,3 @@ 17 0 1 (25.0%) 1 (11.1%) 15 1 (20.0%) 0 1 (11.1%) -# summarize_num_patients works as expected with risk difference column - - Code - res - Output - A: Drug X B: Placebo C: Combination Risk Difference (%) (95% CI) - (N=202) (N=177) (N=162) (N=379) - —————————————————————————————————————————————————————————————————————————————————————————————————————————————————————— - cl D - Number of patients with at least one event 40 (19.8%) 40 (22.6%) 29 (17.9%) -2.8 (-11.1 - 5.5) - cl C - Number of patients with at least one event 31 (15.3%) 23 (13.0%) 25 (15.4%) 2.4 (-4.7 - 9.4) - cl B - Number of patients with at least one event 39 (19.3%) 36 (20.3%) 31 (19.1%) -1.0 (-9.1 - 7.0) - cl A - Number of patients with at least one event 31 (15.3%) 24 (13.6%) 27 (16.7%) 1.8 (-5.3 - 8.9) - ---- - - Code - res - Output - A: Drug X B: Placebo C: Combination Risk Difference (%) (95% CI) - (N=202) (N=177) (N=162) (N=379) - —————————————————————————————————————————————————————————————————————————————————————————————————————————————————————— - cl D - Number of patients with at least one event 40 (19.8%) 40 (22.6%) 29 (17.9%) -2.8 (-11.1 - 5.5) - Number of events 66 57 43 -2.8 (-11.1 - 5.5) - cl D (n) 40 40 29 -2.8 (-11.1 - 5.5) - cl C - Number of patients with at least one event 31 (15.3%) 23 (13.0%) 25 (15.4%) 2.4 (-4.7 - 9.4) - Number of events 38 30 33 2.4 (-4.7 - 9.4) - cl C (n) 31 23 25 2.4 (-4.7 - 9.4) - cl B - Number of patients with at least one event 39 (19.3%) 36 (20.3%) 31 (19.1%) -1.0 (-9.1 - 7.0) - Number of events 59 57 51 -1.0 (-9.1 - 7.0) - cl B (n) 39 36 31 -1.0 (-9.1 - 7.0) - cl A - Number of patients with at least one event 31 (15.3%) 24 (13.6%) 27 (16.7%) 1.8 (-5.3 - 8.9) - Number of events 39 33 35 1.8 (-5.3 - 8.9) - cl A (n) 31 24 27 1.8 (-5.3 - 8.9) - -# analyze_num_patients works as expected with risk difference column - - Code - res - Output - A: Drug X B: Placebo C: Combination Risk Difference (%) (95% CI) - (N=202) (N=177) (N=162) (N=379) - ————————————————————————————————————————————————————————————————————————————————— - Any SAE 59 (29.2%) 57 (32.2%) 48 (29.6%) -3.0 (-12.3 - 6.3) - ---- - - Code - res - Output - A: Drug X B: Placebo C: Combination Risk Difference (%) (95% CI) - (N=202) (N=177) (N=162) (N=379) - ————————————————————————————————————————————————————————————————————————————————— - Any SAE 59 (29.2%) 57 (32.2%) 48 (29.6%) -3.0 (-12.3 - 6.3) - 202 177 162 -3.0 (-12.3 - 6.3) - (n) 59 57 48 -3.0 (-12.3 - 6.3) - From a527bd212920601ac5fe75fae3cbe9c5cb10eeff Mon Sep 17 00:00:00 2001 From: Melkiades Date: Wed, 19 Feb 2025 11:14:54 +0100 Subject: [PATCH 07/12] fix order --- R/analyze_variables.R | 13 ++++++------- R/utils_default_stats_formats_labels.R | 18 +++++++++++++----- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/R/analyze_variables.R b/R/analyze_variables.R index 2c888713ea..9f1612a42c 100644 --- a/R/analyze_variables.R +++ b/R/analyze_variables.R @@ -549,7 +549,7 @@ a_summary <- function(x, # Check if there are user-defined functions default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) - .stats <- default_and_custom_stats_list$default_stats + .stats <- default_and_custom_stats_list$all_stats # just the labels of stats custom_stat_functions <- default_and_custom_stats_list$custom_stats # Correction of the pval indication if it is numeric or counts @@ -588,12 +588,11 @@ a_summary <- function(x, # Fill in with stats defaults if needed met_grp <- paste0(c("analyze_vars", type), collapse = "_") - .stats <- c( - get_stats(met_grp, - stats_in = .stats, - add_pval = dots_extra_args$compare_with_ref_group %||% FALSE - ), - names(custom_stat_functions) # Additional stats from custom functions + .stats <- get_stats( + met_grp, + stats_in = .stats, + add_pval = dots_extra_args$compare_with_ref_group %||% FALSE, + custom_stat_in = names(custom_stat_functions) ) x_stats <- x_stats[.stats] diff --git a/R/utils_default_stats_formats_labels.R b/R/utils_default_stats_formats_labels.R index 84dba5b9ac..4ae073f4d4 100644 --- a/R/utils_default_stats_formats_labels.R +++ b/R/utils_default_stats_formats_labels.R @@ -33,7 +33,9 @@ NULL #' @param method_groups (`character`)\cr indicates the statistical method group (`tern` analyze function) #' to retrieve default statistics for. A character vector can be used to specify more than one statistical #' method group. -#' @param stats_in (`character`)\cr statistics to retrieve for the selected method group. +#' @param stats_in (`character`)\cr statistics to retrieve for the selected method group. If custom statistical +#' functions are used, `stats_in` needs to have them in too. +#' @param custom_stat_in (`character`)\cr custom statistics to add to the default statistics. #' @param add_pval (`flag`)\cr should `"pval"` (or `"pval_counts"` if `method_groups` contains #' `"analyze_vars_counts"`) be added to the statistical methods? #' @@ -57,7 +59,8 @@ NULL #' get_stats(c("count_occurrences", "analyze_vars_counts")) #' #' @export -get_stats <- function(method_groups = "analyze_vars_numeric", stats_in = NULL, add_pval = FALSE) { +get_stats <- function(method_groups = "analyze_vars_numeric", + stats_in = NULL, custom_stat_in = NULL, add_pval = FALSE) { checkmate::assert_character(method_groups) checkmate::assert_character(stats_in, null.ok = TRUE) checkmate::assert_flag(add_pval) @@ -82,6 +85,9 @@ get_stats <- function(method_groups = "analyze_vars_numeric", stats_in = NULL, a out <- unique(c(out, out_tmp)) } + # Add custom stats + out <- c(out, custom_stat_in) + # If you added pval to the stats_in you certainly want it if (!is.null(stats_in) && any(grepl("^pval", stats_in))) { stats_in_pval_value <- stats_in[grepl("^pval", stats_in)] @@ -172,16 +178,18 @@ get_stat_names <- function(stat_results, stat_names_in = NULL) { # Utility function used to separate custom stats (user-defined functions) from defaults .split_std_from_custom_stats <- function(stats_in) { - out <- list(default_stats = NULL, custom_stats = NULL) + out <- list(default_stats = NULL, custom_stats = NULL, all_stats = NULL) if (is.list(stats_in)) { is_custom_fnc <- sapply(stats_in, is.function) checkmate::assert_list(stats_in[is_custom_fnc], types = "function", names = "named") out[["custom_stats"]] <- stats_in[is_custom_fnc] out[["default_stats"]] <- unlist(stats_in[!is_custom_fnc]) + all_stats <- names(stats_in) # to keep the order + all_stats[!is_custom_fnc] <- out[["default_stats"]] + out[["all_stats"]] <- all_stats } else { - out[["default_stats"]] <- stats_in + out[["default_stats"]] <- out[["all_stats"]] <- stats_in } - out } From 51eed0a9cef3b97fab2d94be09ad43fe6984870d Mon Sep 17 00:00:00 2001 From: Melkiades Date: Thu, 20 Feb 2025 15:11:17 +0100 Subject: [PATCH 08/12] update before changing PR --- R/analyze_variables.R | 4 ++-- R/count_values.R | 7 ++++--- R/utils_default_stats_formats_labels.R | 5 +++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/R/analyze_variables.R b/R/analyze_variables.R index 9f1612a42c..f1033189cf 100644 --- a/R/analyze_variables.R +++ b/R/analyze_variables.R @@ -591,8 +591,8 @@ a_summary <- function(x, .stats <- get_stats( met_grp, stats_in = .stats, - add_pval = dots_extra_args$compare_with_ref_group %||% FALSE, - custom_stat_in = names(custom_stat_functions) + custom_stat_in = names(custom_stat_functions), + add_pval = dots_extra_args$compare_with_ref_group %||% FALSE ) x_stats <- x_stats[.stats] diff --git a/R/count_values.R b/R/count_values.R index 0a74b60b7a..c9de125203 100644 --- a/R/count_values.R +++ b/R/count_values.R @@ -118,7 +118,7 @@ a_count_values <- function(x, # Check for user-defined functions default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) - .stats <- default_and_custom_stats_list$default_stats + .stats <- default_and_custom_stats_list$all_stats custom_stat_functions <- default_and_custom_stats_list$custom_stats # Add extra parameters to the s_* function @@ -139,8 +139,9 @@ a_count_values <- function(x, ) # Fill in formatting defaults - .stats <- c( - get_stats("analyze_vars_counts", stats_in = .stats), + .stats <- get_stats("analyze_vars_counts", + stats_in = .stats, + custom_stats_in = names(custom_stat_functions)), names(custom_stat_functions) # Additional stats from custom functions ) .formats <- get_formats_from_stats(.stats, .formats) diff --git a/R/utils_default_stats_formats_labels.R b/R/utils_default_stats_formats_labels.R index 4ae073f4d4..dc3814a79a 100644 --- a/R/utils_default_stats_formats_labels.R +++ b/R/utils_default_stats_formats_labels.R @@ -35,7 +35,7 @@ NULL #' method group. #' @param stats_in (`character`)\cr statistics to retrieve for the selected method group. If custom statistical #' functions are used, `stats_in` needs to have them in too. -#' @param custom_stat_in (`character`)\cr custom statistics to add to the default statistics. +#' @param custom_stats_in (`character`)\cr custom statistics to add to the default statistics. #' @param add_pval (`flag`)\cr should `"pval"` (or `"pval_counts"` if `method_groups` contains #' `"analyze_vars_counts"`) be added to the statistical methods? #' @@ -60,9 +60,10 @@ NULL #' #' @export get_stats <- function(method_groups = "analyze_vars_numeric", - stats_in = NULL, custom_stat_in = NULL, add_pval = FALSE) { + stats_in = NULL, custom_stats_in = NULL, add_pval = FALSE) { checkmate::assert_character(method_groups) checkmate::assert_character(stats_in, null.ok = TRUE) + checkmate::assert_character(custom_stats_in, null.ok = TRUE) checkmate::assert_flag(add_pval) # Default is still numeric From 647a3c75925d05ad9ee928c70735799e0d113618 Mon Sep 17 00:00:00 2001 From: Melkiades Date: Wed, 26 Feb 2025 21:36:31 +0100 Subject: [PATCH 09/12] refactoring riskdiff - still some fix to do --- R/riskdiff.R | 172 ++++++++++++++---- R/summarize_num_patients.R | 58 +++--- .../testthat/_snaps/summarize_num_patients.md | 27 +++ tests/testthat/test-summarize_num_patients.R | 15 +- 4 files changed, 201 insertions(+), 71 deletions(-) diff --git a/R/riskdiff.R b/R/riskdiff.R index 403870851e..71e9d68427 100644 --- a/R/riskdiff.R +++ b/R/riskdiff.R @@ -85,29 +85,71 @@ add_riskdiff <- function(arm_x, #' @keywords internal afun_riskdiff <- function(df, labelstr = "", - afun, - s_args = list(), - ...) { + ..., + .stats = NULL, + .stat_names = NULL, + .formats = NULL, + .labels = NULL, + .indent_mods = NULL) { + dots_extra_args <- list(...) + + # Check if there are user-defined functions + default_and_custom_stats_list <- .split_std_from_custom_stats(.stats) + .stats <- default_and_custom_stats_list$default_stats + custom_stat_functions <- default_and_custom_stats_list$custom_stats + + # Adding automatically extra parameters to the statistic function (see ?rtables::additional_fun_params) + extra_afun_params <- retrieve_extra_afun_params( + names(dots_extra_args$.additional_fun_parameters) + ) + dots_extra_args$.additional_fun_parameters <- NULL # After extraction we do not need them anymore + + .spl_context <- extra_afun_params[[".spl_context"]] + .N_col <- extra_afun_params[[".N_col"]] + .all_col_counts <- extra_afun_params[[".all_col_counts"]] + + # Checking if the user has set up the levels to use in risk difference calculations if (!any(grepl("riskdiff", names(.spl_context)))) { stop( "Please set up levels to use in risk difference calculations using the `add_riskdiff` ", "split function within `split_cols_by`. See ?add_riskdiff for details." ) } - checkmate::assert_list(afun, len = 1, types = "function") - checkmate::assert_named(afun) - browser() - afun_args <- c( - .var = .var, list(.df_row = .df_row), .N_row = .N_row, denom = "N_col", labelstr = labelstr, - s_args + + # Is this a summary content row? (label row with data summary) + isc <- isTRUE(dots_extra_args$is_summary_content) + args_list <- c( + if(isc) { + list(df = df) + } else { + list(x = df[[extra_afun_params$.var]]) + }, + extra_afun_params, + dots_extra_args ) - afun_args <- afun_args[intersect(names(afun_args), names(as.list(args(afun[[1]]))))] - if ("denom" %in% names(s_args)) afun_args[["denom"]] <- NULL + + dots_extra_args[["denom"]] <- NULL cur_split <- tail(.spl_context$cur_col_split_val[[1]], 1) if (!grepl("^riskdiff", cur_split)) { # Apply basic afun (no risk difference) in all other columns - do.call(afun[[1]], args = c(list(df = df, .var = .var, .N_col = .N_col, .spl_context = .spl_context), afun_args)) + x_stats <- .apply_stat_functions( + default_stat_fnc = if (isc) { + s_num_patients_content + } else { + s_num_patients + }, + custom_stat_fnc_list = custom_stat_functions, + args_list = args_list + ) + + # Fill in with stats defaults if needed + .stats <- c( + get_stats("summarize_num_patients", stats_in = .stats), + names(custom_stat_functions) + ) + + out_list <- x_stats[.stats] } else { arm_x <- strsplit(cur_split, "_")[[1]][2] arm_y <- strsplit(cur_split, "_")[[1]][3] @@ -124,33 +166,99 @@ afun_riskdiff <- function(df, cur_var <- tail(.spl_context$cur_col_split[[1]], 1) # Apply statistics function to arm X and arm Y data - s_args <- c(s_args, afun_args[intersect(names(afun_args), names(as.list(args(names(afun)))))]) - s_x <- do.call(names(afun), args = c(list(df = df[df[[cur_var]] == arm_x, ], .N_col = N_col_x), s_args)) - s_y <- do.call(names(afun), args = c(list(df = df[df[[cur_var]] == arm_y, ], .N_col = N_col_y), s_args)) - - # Get statistic name and row names - stat <- ifelse("count_fraction" %in% names(s_x), "count_fraction", "unique") - if ("flag_variables" %in% names(s_args)) { - var_nms <- s_args$flag_variables - } else if (is.list(s_x[[stat]]) && !is.null(names(s_x[[stat]]))) { - var_nms <- names(s_x[[stat]]) + args_list[["x"]] <- NULL # It does not matter? + if (!("df" %in% names(args_list))) { + args_list <- c(list("df" = NULL), args_list) + } + args_list[["df"]] <- df[df[[cur_var]] == arm_x, ] + extra_afun_params[[".N_col"]] <- N_col_x + x_stats <- .apply_stat_functions( + default_stat_fnc = s_num_patients_content, # why content? + custom_stat_fnc_list = custom_stat_functions, + args_list = args_list + ) + extra_afun_params[[".N_col"]] <- N_col_y + args_list[["df"]] <- df[df[[cur_var]] == arm_y, ] + y_stats <- .apply_stat_functions( + default_stat_fnc = s_num_patients_content, # why content? + custom_stat_fnc_list = custom_stat_functions, + args_list = args_list + ) + + # Fill in with stats defaults if needed + .stats <- c( + get_stats("summarize_num_patients", stats_in = .stats), + names(custom_stat_functions) + ) + + # Forced types for risk differences + if (!any(names(x_stats) %in% c("count_fraction", "unique"))) { + stop("Risk difference calculations are supported only for count_fraction or unique statistics.") + } + .stats <- ifelse("count_fraction" %in% names(.stats), "count_fraction", "unique") + x_stats <- x_stats[.stats] + y_stats <- y_stats[.stats] + if ("flag_variables" %in% names(dots_extra_args)) { + var_nms <- dots_extra_args$flag_variables + } else if (is.list(x_stats) && !is.null(names(x_stats))) { + var_nms <- names(x_stats) } else { var_nms <- "" - s_x[[stat]] <- list(s_x[[stat]]) - s_y[[stat]] <- list(s_y[[stat]]) + x_stats <- list(x_stats) + y_stats <- list(y_stats) } # Calculate risk difference for each row, repeated if multiple statistics in table pct <- tail(strsplit(cur_split, "_")[[1]], 1) == "pct" - rd_ci <- rep(stat_propdiff_ci( - lapply(s_x[[stat]], `[`, 1), lapply(s_y[[stat]], `[`, 1), - N_col_x, N_col_y, - list_names = var_nms, - pct = pct - ), max(1, length(s_args$.stats))) - - in_rows(.list = rd_ci, .formats = "xx.x (xx.x - xx.x)", .indent_mods = s_args$.indent_mods) + + x_first_value <- lapply(x_stats, `[`, 1) + y_second_value <- lapply(y_stats, `[`, 1) + out_list <- sapply(seq(.stats), function(stat_i) { + stat_propdiff_ci( + x_first_value[stat_i], y_second_value[stat_i], + N_col_x, N_col_y, + list_names = var_nms, + pct = pct + ) + }) + + # It feels an imposition but here it is (TO ADD risk_diff_unique, etc) + .formats <- lapply(out_list, function(x) "xx.x (xx.x - xx.x)") + # in_rows(.list = rd_ci, .formats = "xx.x (xx.x - xx.x)", .indent_mods = .indent_mods) } + + + # Fill in formats/indents/labels with custom input and defaults + .formats <- get_formats_from_stats(.stats, .formats) + .indent_mods <- get_indents_from_stats(.stats, .indent_mods) + if (anyNA(.labels[names(out_list)])) { + .labels <- setNames(.labels[names(out_list)], names(out_list)) + attr_labels <- sapply(out_list, attr, "label") + attr_labels <- attr_labels[nzchar(attr_labels)] + .labels[names(.labels) %in% names(attr_labels) & is.na(.labels)] <- attr_labels + .labels <- .labels[!is.na(.labels)] + } + .labels <- get_labels_from_stats(.stats, .labels) + + # Auto format handling + .formats <- apply_auto_formatting( + .formats, + out_list, + extra_afun_params$.df_row, + extra_afun_params$.var + ) + + # Get and check statistical names from defaults + .stat_names <- get_stat_names(out_list, .stat_names) + + in_rows( + .list = out_list, + .formats = .formats, + .names = names(.labels), + .stat_names = .stat_names, + .labels = .labels %>% .unlist_keep_nulls(), + .indent_mods = .indent_mods %>% .unlist_keep_nulls() + ) } #' Control function for risk difference column diff --git a/R/summarize_num_patients.R b/R/summarize_num_patients.R index 1702af9d67..4c558a1082 100644 --- a/R/summarize_num_patients.R +++ b/R/summarize_num_patients.R @@ -153,28 +153,28 @@ a_num_patients <- function(df, ) dots_extra_args$.additional_fun_parameters <- NULL # After extraction we do not need them anymore + # Is this a summary content row? (label row with data summary) + isc <- isTRUE(dots_extra_args$is_summary_content) + args_list <- c( + if(isc) { + list(df = df) + } else { + list(x = df[[extra_afun_params$.var]]) + }, + extra_afun_params, + dots_extra_args + ) + # Main statistical functions application - if (isTRUE(dots_extra_args$is_summary_content)) { - x_stats <- .apply_stat_functions( - default_stat_fnc = s_num_patients_content, - custom_stat_fnc_list = custom_stat_functions, - args_list = c( - df = list(df), - extra_afun_params, - dots_extra_args - ) - ) - } else { - x_stats <- .apply_stat_functions( - default_stat_fnc = s_num_patients, - custom_stat_fnc_list = custom_stat_functions, - args_list = c( - x = list(df[[extra_afun_params$.var]]), - extra_afun_params, - dots_extra_args - ) - ) - } + x_stats <- .apply_stat_functions( + default_stat_fnc = if (isc) { + s_num_patients_content + } else { + s_num_patients + }, + custom_stat_fnc_list = custom_stat_functions, + args_list = args_list + ) # Fill in with stats defaults if needed .stats <- c( @@ -275,14 +275,6 @@ summarize_num_patients <- function(lyt, # Riskdiff directive cfun <- ifelse(isFALSE(riskdiff), a_num_patients, afun_riskdiff) - extra_args <- if (isFALSE(riskdiff)) { - extra_args - } else { - list( - afun = list("s_num_patients_content" = a_num_patients), - s_args = extra_args - ) - } # Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params) extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) @@ -379,14 +371,6 @@ analyze_num_patients <- function(lyt, # Riskdiff directive afun <- ifelse(isFALSE(riskdiff), a_num_patients, afun_riskdiff) - extra_args <- if (isFALSE(riskdiff)) { - extra_args - } else { - list( - afun = list("s_num_patients_content" = a_num_patients), - s_args = extra_args - ) - } # Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params) extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) diff --git a/tests/testthat/_snaps/summarize_num_patients.md b/tests/testthat/_snaps/summarize_num_patients.md index b8626d194e..ebf2f38c7e 100644 --- a/tests/testthat/_snaps/summarize_num_patients.md +++ b/tests/testthat/_snaps/summarize_num_patients.md @@ -327,3 +327,30 @@ 17 0 1 (25.0%) 1 (11.1%) 15 1 (20.0%) 0 1 (11.1%) +# summarize_num_patients works as expected with risk difference column + + Code + res + Output + A: Drug X B: Placebo C: Combination Risk Difference (%) (95% CI) + (N=202) (N=177) (N=162) (N=379) + —————————————————————————————————————————————————————————————————————————————————————————————————————————————————————— + cl D + Number of patients with at least one event 40 (19.8%) 40 (22.6%) 29 (17.9%) -2.8 (-11.1 - 5.5) + cl C + Number of patients with at least one event 31 (15.3%) 23 (13.0%) 25 (15.4%) 2.4 (-4.7 - 9.4) + cl B + Number of patients with at least one event 39 (19.3%) 36 (20.3%) 31 (19.1%) -1.0 (-9.1 - 7.0) + cl A + Number of patients with at least one event 31 (15.3%) 24 (13.6%) 27 (16.7%) 1.8 (-5.3 - 8.9) + +# analyze_num_patients works as expected with risk difference column + + Code + res + Output + A: Drug X B: Placebo C: Combination Risk Difference (%) (95% CI) + (N=202) (N=177) (N=162) (N=379) + ————————————————————————————————————————————————————————————————————————————————— + Any SAE 59 (29.2%) 57 (32.2%) 48 (29.6%) -3.0 (-12.3 - 6.3) + diff --git a/tests/testthat/test-summarize_num_patients.R b/tests/testthat/test-summarize_num_patients.R index 3af6119549..6951426050 100644 --- a/tests/testthat/test-summarize_num_patients.R +++ b/tests/testthat/test-summarize_num_patients.R @@ -275,13 +275,24 @@ testthat::test_that("summarize_num_patients works as expected with risk differen # Multiple statistics result <- basic_table(show_colcounts = TRUE) %>% split_cols_by("ARM", split_fun = add_riskdiff("A: Drug X", "B: Placebo")) %>% - split_rows_by("AESOC", child_labels = "visible") %>% + split_rows_by("AESOC", child_labels = "visible") + + testthat::expect_error( + out <- result %>% + summarize_num_patients( + "USUBJID", + riskdiff = TRUE + ) %>% + build_table(tern_ex_adae), + "Risk difference calculations are supported only for count_fraction" + ) + result <- result %>% summarize_num_patients( "USUBJID", + .stats = c("unique", "nonunique"), riskdiff = TRUE ) %>% build_table(tern_ex_adae) - res <- testthat::expect_silent(result) testthat::expect_snapshot(res) }) From d8b562aba31f2d947164f57e20e30e66c207d8dd Mon Sep 17 00:00:00 2001 From: Melkiades Date: Fri, 28 Feb 2025 16:04:01 +0100 Subject: [PATCH 10/12] fix --- R/riskdiff.R | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/R/riskdiff.R b/R/riskdiff.R index 71e9d68427..b3947c8026 100644 --- a/R/riskdiff.R +++ b/R/riskdiff.R @@ -186,18 +186,16 @@ afun_riskdiff <- function(df, ) # Fill in with stats defaults if needed - .stats <- c( - get_stats("summarize_num_patients", stats_in = .stats), - names(custom_stat_functions) - ) + .stats <- get_stats("summarize_num_patients", stats_in = .stats, + custom_stats_in = names(custom_stat_functions)) # Forced types for risk differences if (!any(names(x_stats) %in% c("count_fraction", "unique"))) { stop("Risk difference calculations are supported only for count_fraction or unique statistics.") } - .stats <- ifelse("count_fraction" %in% names(.stats), "count_fraction", "unique") - x_stats <- x_stats[.stats] - y_stats <- y_stats[.stats] + stat_unique_or_count_fraction <- ifelse("count_fraction" %in% names(.stats), "count_fraction", "unique") + x_stats <- x_stats[stat_unique_or_count_fraction] + y_stats <- y_stats[stat_unique_or_count_fraction] if ("flag_variables" %in% names(dots_extra_args)) { var_nms <- dots_extra_args$flag_variables } else if (is.list(x_stats) && !is.null(names(x_stats))) { @@ -210,10 +208,9 @@ afun_riskdiff <- function(df, # Calculate risk difference for each row, repeated if multiple statistics in table pct <- tail(strsplit(cur_split, "_")[[1]], 1) == "pct" - x_first_value <- lapply(x_stats, `[`, 1) y_second_value <- lapply(y_stats, `[`, 1) - out_list <- sapply(seq(.stats), function(stat_i) { + out_list <- sapply(rep(1, length(seq(.stats))), function(stat_i) { stat_propdiff_ci( x_first_value[stat_i], y_second_value[stat_i], N_col_x, N_col_y, @@ -223,11 +220,12 @@ afun_riskdiff <- function(df, }) # It feels an imposition but here it is (TO ADD risk_diff_unique, etc) - .formats <- lapply(out_list, function(x) "xx.x (xx.x - xx.x)") - # in_rows(.list = rd_ci, .formats = "xx.x (xx.x - xx.x)", .indent_mods = .indent_mods) + .formats <- setNames( + lapply(out_list, function(x) "xx.x (xx.x - xx.x)"), + .stats + ) } - # Fill in formats/indents/labels with custom input and defaults .formats <- get_formats_from_stats(.stats, .formats) .indent_mods <- get_indents_from_stats(.stats, .indent_mods) From 1515bc49a6c09abe93924944dfe011a6ff05d66f Mon Sep 17 00:00:00 2001 From: Melkiades Date: Fri, 28 Feb 2025 18:12:31 +0100 Subject: [PATCH 11/12] riskdiff changes --- R/count_occurrences.R | 40 +++++--------- R/count_occurrences_by_grade.R | 40 ++++---------- R/count_patients_with_event.R | 20 +++---- R/count_patients_with_flags.R | 17 ++---- R/riskdiff.R | 54 ++++++++++--------- R/summarize_num_patients.R | 8 +-- .../testthat/_snaps/summarize_num_patients.md | 33 ++++++++++++ tests/testthat/test-summarize_num_patients.R | 9 ---- 8 files changed, 99 insertions(+), 122 deletions(-) diff --git a/R/count_occurrences.R b/R/count_occurrences.R index bbac47fb02..177d0667d6 100644 --- a/R/count_occurrences.R +++ b/R/count_occurrences.R @@ -239,26 +239,18 @@ count_occurrences <- function(lyt, checkmate::assert_flag(riskdiff) extra_args <- list( - .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str + .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str, + id = id, drop = drop, ... ) - s_args <- list(id = id, drop = drop, ...) - if (isFALSE(riskdiff)) { - extra_args <- c(extra_args, s_args) - } else { - extra_args <- c( - extra_args, - list( - afun = list("s_count_occurrences" = a_count_occurrences), - s_args = s_args - ) - ) - } + # Riskdiff directive + afun <- ifelse(isFALSE(riskdiff), a_count_occurrences, afun_riskdiff) + if (isTRUE(riskdiff)) extra_args[["sfun_local"]] <- s_count_occurrences analyze( lyt = lyt, vars = vars, - afun = ifelse(isFALSE(riskdiff), a_count_occurrences, afun_riskdiff), + afun = afun, var_labels = var_labels, show_labels = show_labels, table_names = table_names, @@ -303,26 +295,18 @@ summarize_occurrences <- function(lyt, checkmate::assert_flag(riskdiff) extra_args <- list( - .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str + .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str, + id = id, drop = drop, ... ) - s_args <- list(id = id, drop = drop, ...) - if (isFALSE(riskdiff)) { - extra_args <- c(extra_args, s_args) - } else { - extra_args <- c( - extra_args, - list( - afun = list("s_count_occurrences" = a_count_occurrences), - s_args = s_args - ) - ) - } + # Riskdiff directive + cfun <- ifelse(isFALSE(riskdiff), a_count_occurrences, afun_riskdiff) + if (isTRUE(riskdiff)) extra_args[["sfun_local"]] <- s_count_occurrences summarize_row_groups( lyt = lyt, var = var, - cfun = ifelse(isFALSE(riskdiff), a_count_occurrences, afun_riskdiff), + cfun = cfun, na_str = na_str, extra_args = extra_args ) diff --git a/R/count_occurrences_by_grade.R b/R/count_occurrences_by_grade.R index 98697b77f0..bfed49344c 100644 --- a/R/count_occurrences_by_grade.R +++ b/R/count_occurrences_by_grade.R @@ -370,28 +370,18 @@ count_occurrences_by_grade <- function(lyt, .labels = NULL) { checkmate::assert_flag(riskdiff) extra_args <- list( - .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str - ) - s_args <- list( + .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str, id = id, grade_groups = grade_groups, remove_single = remove_single, only_grade_groups = only_grade_groups, ... ) - if (isFALSE(riskdiff)) { - extra_args <- c(extra_args, s_args) - } else { - extra_args <- c( - extra_args, - list( - afun = list("s_count_occurrences_by_grade" = a_count_occurrences_by_grade), - s_args = s_args - ) - ) - } + # Riskdiff directive + afun <- ifelse(isFALSE(riskdiff), a_count_occurrences_by_grade, afun_riskdiff) + if (isTRUE(riskdiff)) extra_args[["sfun_local"]] <- s_count_occurrences_by_grade analyze( lyt = lyt, vars = var, - afun = ifelse(isFALSE(riskdiff), a_count_occurrences_by_grade, afun_riskdiff), + afun = afun, var_labels = var_labels, show_labels = show_labels, table_names = table_names, @@ -446,28 +436,18 @@ summarize_occurrences_by_grade <- function(lyt, .labels = NULL) { checkmate::assert_flag(riskdiff) extra_args <- list( - .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str - ) - s_args <- list( + .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str, id = id, grade_groups = grade_groups, remove_single = remove_single, only_grade_groups = only_grade_groups, ... ) - if (isFALSE(riskdiff)) { - extra_args <- c(extra_args, s_args) - } else { - extra_args <- c( - extra_args, - list( - afun = list("s_count_occurrences_by_grade" = a_count_occurrences_by_grade), - s_args = s_args - ) - ) - } + # Riskdiff directive + cfun <- ifelse(isFALSE(riskdiff), a_count_occurrences_by_grade, afun_riskdiff) + if (isTRUE(riskdiff)) extra_args[["sfun_local"]] <- s_count_occurrences_by_grade summarize_row_groups( lyt = lyt, var = var, - cfun = ifelse(isFALSE(riskdiff), a_count_occurrences_by_grade, afun_riskdiff), + cfun = cfun, na_str = na_str, extra_args = extra_args ) diff --git a/R/count_patients_with_event.R b/R/count_patients_with_event.R index 520f09478d..7a471f35e6 100644 --- a/R/count_patients_with_event.R +++ b/R/count_patients_with_event.R @@ -200,26 +200,18 @@ count_patients_with_event <- function(lyt, .indent_mods = NULL) { checkmate::assert_flag(riskdiff) extra_args <- list( - .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str + .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str, + filters = filters, ... ) - s_args <- list(filters = filters, ...) - if (isFALSE(riskdiff)) { - extra_args <- c(extra_args, s_args) - } else { - extra_args <- c( - extra_args, - list( - afun = list("s_count_patients_with_event" = a_count_patients_with_event), - s_args = s_args - ) - ) - } + # Riskdiff directive + afun <- ifelse(isFALSE(riskdiff), a_count_patients_with_event, afun_riskdiff) + if (isTRUE(riskdiff)) extra_args[["sfun_local"]] <- s_count_patients_with_event analyze( lyt = lyt, vars = vars, - afun = ifelse(isFALSE(riskdiff), a_count_patients_with_event, afun_riskdiff), + afun = afun, show_labels = ifelse(length(vars) > 1, "visible", "hidden"), table_names = table_names, na_str = na_str, diff --git a/R/count_patients_with_flags.R b/R/count_patients_with_flags.R index 260750de01..4afe7ea466 100644 --- a/R/count_patients_with_flags.R +++ b/R/count_patients_with_flags.R @@ -226,24 +226,15 @@ count_patients_with_flags <- function(lyt, .stats = .stats, .stat_names = .stat_names, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str ) - s_args <- list(flag_variables = flag_variables, flag_labels = flag_labels, ...) - if (isFALSE(riskdiff)) { - extra_args <- c(extra_args, s_args) - } else { - extra_args <- c( - extra_args, - list( - afun = list("s_count_patients_with_flags" = a_count_patients_with_flags), - s_args = s_args - ) - ) - } + # Riskdiff directive + afun <- ifelse(isFALSE(riskdiff), a_count_patients_with_flags, afun_riskdiff) + if (isTRUE(riskdiff)) extra_args[["sfun_local"]] <- s_count_patients_with_flags analyze( lyt = lyt, vars = var, - afun = ifelse(isFALSE(riskdiff), a_count_patients_with_flags, afun_riskdiff), + afun = afun, var_labels = var_labels, show_labels = show_labels, table_names = table_names, diff --git a/R/riskdiff.R b/R/riskdiff.R index b3947c8026..8db419e262 100644 --- a/R/riskdiff.R +++ b/R/riskdiff.R @@ -68,11 +68,6 @@ add_riskdiff <- function(arm_x, #' function utilizes the [stat_propdiff_ci()] function to perform risk difference calculations. #' #' @inheritParams argument_convention -#' @param afun (named `list`)\cr a named list containing one name-value pair where the name corresponds to -#' the name of the statistics function that should be used in calculations and the value is the corresponding -#' analysis function. -#' @param s_args (named `list`)\cr additional arguments to be passed to the statistics function and analysis -#' function supplied in `afun`. #' #' @return A list of formatted [rtables::CellValue()]. #' @@ -84,6 +79,7 @@ add_riskdiff <- function(arm_x, #' #' @keywords internal afun_riskdiff <- function(df, + sfun_local, labelstr = "", ..., .stats = NULL, @@ -116,14 +112,17 @@ afun_riskdiff <- function(df, ) } - # Is this a summary content row? (label row with data summary) - isc <- isTRUE(dots_extra_args$is_summary_content) + # Is it a df or x? + df_or_x_formal <- names(formals(sfun_local)[1]) + main_arg <- if (df_or_x_formal == "df") { + list(df = df) + } else if (df_or_x_formal == "x") { + list(x = df[[extra_afun_params$.var]]) + } else { + stop("The first argument of the analysis function must be a data frame or a vector.") + } args_list <- c( - if(isc) { - list(df = df) - } else { - list(x = df[[extra_afun_params$.var]]) - }, + main_arg, extra_afun_params, dots_extra_args ) @@ -134,11 +133,7 @@ afun_riskdiff <- function(df, if (!grepl("^riskdiff", cur_split)) { # Apply basic afun (no risk difference) in all other columns x_stats <- .apply_stat_functions( - default_stat_fnc = if (isc) { - s_num_patients_content - } else { - s_num_patients - }, + default_stat_fnc = sfun_local, custom_stat_fnc_list = custom_stat_functions, args_list = args_list ) @@ -166,28 +161,37 @@ afun_riskdiff <- function(df, cur_var <- tail(.spl_context$cur_col_split[[1]], 1) # Apply statistics function to arm X and arm Y data - args_list[["x"]] <- NULL # It does not matter? if (!("df" %in% names(args_list))) { args_list <- c(list("df" = NULL), args_list) } - args_list[["df"]] <- df[df[[cur_var]] == arm_x, ] + if (df_or_x_formal == "x") { + args_list[["x"]] <- df[df[[cur_var]] == arm_x, ][[extra_afun_params$.var]] + } else { + args_list[["df"]] <- df[df[[cur_var]] == arm_x, ] + } extra_afun_params[[".N_col"]] <- N_col_x x_stats <- .apply_stat_functions( - default_stat_fnc = s_num_patients_content, # why content? + default_stat_fnc = sfun_local, # why content? custom_stat_fnc_list = custom_stat_functions, args_list = args_list ) extra_afun_params[[".N_col"]] <- N_col_y - args_list[["df"]] <- df[df[[cur_var]] == arm_y, ] + if (df_or_x_formal == "x") { + args_list[["x"]] <- df[df[[cur_var]] == arm_y, ][[extra_afun_params$.var]] + } else { + args_list[["df"]] <- df[df[[cur_var]] == arm_y, ] + } y_stats <- .apply_stat_functions( - default_stat_fnc = s_num_patients_content, # why content? + default_stat_fnc = sfun_local, # why content? custom_stat_fnc_list = custom_stat_functions, args_list = args_list ) # Fill in with stats defaults if needed - .stats <- get_stats("summarize_num_patients", stats_in = .stats, - custom_stats_in = names(custom_stat_functions)) + .stats <- get_stats("summarize_num_patients", + stats_in = .stats, + custom_stats_in = names(custom_stat_functions) + ) # Forced types for risk differences if (!any(names(x_stats) %in% c("count_fraction", "unique"))) { @@ -220,7 +224,7 @@ afun_riskdiff <- function(df, }) # It feels an imposition but here it is (TO ADD risk_diff_unique, etc) - .formats <- setNames( + .formats <- setNames( lapply(out_list, function(x) "xx.x (xx.x - xx.x)"), .stats ) diff --git a/R/summarize_num_patients.R b/R/summarize_num_patients.R index 4c558a1082..17e2417d9e 100644 --- a/R/summarize_num_patients.R +++ b/R/summarize_num_patients.R @@ -156,7 +156,7 @@ a_num_patients <- function(df, # Is this a summary content row? (label row with data summary) isc <- isTRUE(dots_extra_args$is_summary_content) args_list <- c( - if(isc) { + if (isc) { list(df = df) } else { list(x = df[[extra_afun_params$.var]]) @@ -246,7 +246,7 @@ summarize_num_patients <- function(lyt, required = NULL, count_by = NULL, unique_count_suffix = TRUE, - .stats = c("unique", "nonunique", "unique_count"), + .stats = c("unique", "nonunique", "unique_count"), .stat_names = NULL, .formats = c(unique = format_count_fraction_fixed_dp, nonunique = "xx", unique_count = "xx"), .labels = c( @@ -275,6 +275,7 @@ summarize_num_patients <- function(lyt, # Riskdiff directive cfun <- ifelse(isFALSE(riskdiff), a_num_patients, afun_riskdiff) + if (isTRUE(riskdiff)) extra_args[["sfun_local"]] <- s_num_patients_content # Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params) extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) @@ -343,7 +344,7 @@ analyze_num_patients <- function(lyt, required = NULL, count_by = NULL, unique_count_suffix = TRUE, - .stats = c("unique", "nonunique", "unique_count"), + .stats = c("unique", "nonunique", "unique_count"), .stat_names = NULL, .formats = c(unique = format_count_fraction_fixed_dp, nonunique = "xx", unique_count = "xx"), .labels = c( @@ -371,6 +372,7 @@ analyze_num_patients <- function(lyt, # Riskdiff directive afun <- ifelse(isFALSE(riskdiff), a_num_patients, afun_riskdiff) + if (isTRUE(riskdiff)) extra_args[["sfun_local"]] <- s_num_patients # Adding all additional information from layout to analysis functions (see ?rtables::additional_fun_params) extra_args[[".additional_fun_parameters"]] <- get_additional_afun_params(add_alt_df = FALSE) diff --git a/tests/testthat/_snaps/summarize_num_patients.md b/tests/testthat/_snaps/summarize_num_patients.md index ebf2f38c7e..936a84f360 100644 --- a/tests/testthat/_snaps/summarize_num_patients.md +++ b/tests/testthat/_snaps/summarize_num_patients.md @@ -344,6 +344,27 @@ cl A Number of patients with at least one event 31 (15.3%) 24 (13.6%) 27 (16.7%) 1.8 (-5.3 - 8.9) +--- + + Code + res + Output + A: Drug X B: Placebo C: Combination Risk Difference (%) (95% CI) + (N=202) (N=177) (N=162) (N=379) + —————————————————————————————————————————————————————————————————————————————————————————————————————————————————————— + cl D + Number of patients with at least one event 40 (19.8%) 40 (22.6%) 29 (17.9%) -2.8 (-11.1 - 5.5) + Number of events 66 57 43 -2.8 (-11.1 - 5.5) + cl C + Number of patients with at least one event 31 (15.3%) 23 (13.0%) 25 (15.4%) 2.4 (-4.7 - 9.4) + Number of events 38 30 33 2.4 (-4.7 - 9.4) + cl B + Number of patients with at least one event 39 (19.3%) 36 (20.3%) 31 (19.1%) -1.0 (-9.1 - 7.0) + Number of events 59 57 51 -1.0 (-9.1 - 7.0) + cl A + Number of patients with at least one event 31 (15.3%) 24 (13.6%) 27 (16.7%) 1.8 (-5.3 - 8.9) + Number of events 39 33 35 1.8 (-5.3 - 8.9) + # analyze_num_patients works as expected with risk difference column Code @@ -354,3 +375,15 @@ ————————————————————————————————————————————————————————————————————————————————— Any SAE 59 (29.2%) 57 (32.2%) 48 (29.6%) -3.0 (-12.3 - 6.3) +--- + + Code + res + Output + A: Drug X B: Placebo C: Combination Risk Difference (%) (95% CI) + (N=202) (N=177) (N=162) (N=379) + —————————————————————————————————————————————————————————————————————————————————————————— + Any SAE 59 (29.2%) 57 (32.2%) 48 (29.6%) -3.0 (-12.3 - 6.3) + Number of events 202 177 162 -3.0 (-12.3 - 6.3) + (n) 59 57 48 -3.0 (-12.3 - 6.3) + diff --git a/tests/testthat/test-summarize_num_patients.R b/tests/testthat/test-summarize_num_patients.R index 6951426050..739f32e447 100644 --- a/tests/testthat/test-summarize_num_patients.R +++ b/tests/testthat/test-summarize_num_patients.R @@ -277,15 +277,6 @@ testthat::test_that("summarize_num_patients works as expected with risk differen split_cols_by("ARM", split_fun = add_riskdiff("A: Drug X", "B: Placebo")) %>% split_rows_by("AESOC", child_labels = "visible") - testthat::expect_error( - out <- result %>% - summarize_num_patients( - "USUBJID", - riskdiff = TRUE - ) %>% - build_table(tern_ex_adae), - "Risk difference calculations are supported only for count_fraction" - ) result <- result %>% summarize_num_patients( "USUBJID", From b5484cf7fd6a27d1450275f5af499cc6a50bcc86 Mon Sep 17 00:00:00 2001 From: Melkiades Date: Fri, 28 Feb 2025 18:50:55 +0100 Subject: [PATCH 12/12] docs --- R/riskdiff.R | 1 + man/afun_riskdiff.Rd | 47 +++++-------------- man/prop_diff_test.Rd | 55 +++++++++++++--------- man/summarize_num_patients.Rd | 86 ++++++++++++++++++++++------------- 4 files changed, 101 insertions(+), 88 deletions(-) diff --git a/R/riskdiff.R b/R/riskdiff.R index 8db419e262..65d3ac6b0c 100644 --- a/R/riskdiff.R +++ b/R/riskdiff.R @@ -68,6 +68,7 @@ add_riskdiff <- function(arm_x, #' function utilizes the [stat_propdiff_ci()] function to perform risk difference calculations. #' #' @inheritParams argument_convention +#' @param sfun_local (`function`)\cr statistics function to apply to each column. #' #' @return A list of formatted [rtables::CellValue()]. #' diff --git a/man/afun_riskdiff.Rd b/man/afun_riskdiff.Rd index 6d94682695..ea3a3e036a 100644 --- a/man/afun_riskdiff.Rd +++ b/man/afun_riskdiff.Rd @@ -6,48 +6,32 @@ \usage{ afun_riskdiff( df, + sfun_local, labelstr = "", - .var, - .N_col, - .N_row, - .df_row, - .spl_context, - .all_col_counts, - .stats, + ..., + .stats = NULL, + .stat_names = NULL, .formats = NULL, .labels = NULL, - .indent_mods = NULL, - na_str = default_na_str(), - afun, - s_args = list() + .indent_mods = NULL ) } \arguments{ \item{df}{(\code{data.frame})\cr data set containing all analysis variables.} +\item{sfun_local}{(\code{function})\cr statistics function to apply to each column.} + \item{labelstr}{(\code{string})\cr label of the level of the parent split currently being summarized (must be present as second argument in Content Row Functions). See \code{\link[rtables:summarize_row_groups]{rtables::summarize_row_groups()}} for more information.} -\item{.var}{(\code{string})\cr single variable name that is passed by \code{rtables} when requested -by a statistics function.} - -\item{.N_col}{(\code{integer(1)})\cr column-wise N (column count) for the full column being analyzed that is typically -passed by \code{rtables}.} - -\item{.N_row}{(\code{integer(1)})\cr row-wise N (row group count) for the group of observations being analyzed -(i.e. with no column-based subsetting) that is typically passed by \code{rtables}.} - -\item{.df_row}{(\code{data.frame})\cr data frame across all of the columns for the given row split.} - -\item{.spl_context}{(\code{data.frame})\cr gives information about ancestor split states -that is passed by \code{rtables}.} - -\item{.all_col_counts}{(\code{integer})\cr vector where each value represents a global count for a column. Values are -taken from \code{alt_counts_df} if specified (see \code{\link[rtables:build_table]{rtables::build_table()}}).} +\item{...}{additional arguments for the lower level functions.} \item{.stats}{(\code{character})\cr statistics to select for the table.} +\item{.stat_names}{(\code{character})\cr names of the statistics that are passed directly to name single statistics +(\code{.stats}). This option is visible when producing \code{\link[rtables:data.frame_export]{rtables::as_result_df()}} with \code{make_ard = TRUE}.} + \item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics. See Details in \code{analyze_vars} for more information on the \code{"auto"} setting.} @@ -55,15 +39,6 @@ information on the \code{"auto"} setting.} \item{.indent_mods}{(named \code{integer})\cr indent modifiers for the labels. Defaults to 0, which corresponds to the unmodified default behavior. Can be negative.} - -\item{na_str}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.} - -\item{afun}{(named \code{list})\cr a named list containing one name-value pair where the name corresponds to -the name of the statistics function that should be used in calculations and the value is the corresponding -analysis function.} - -\item{s_args}{(named \code{list})\cr additional arguments to be passed to the statistics function and analysis -function supplied in \code{afun}.} } \value{ A list of formatted \code{\link[rtables:CellValue]{rtables::CellValue()}}. diff --git a/man/prop_diff_test.Rd b/man/prop_diff_test.Rd index 5eb9264ff0..8aa9cb3f56 100644 --- a/man/prop_diff_test.Rd +++ b/man/prop_diff_test.Rd @@ -10,18 +10,21 @@ test_proportion_diff( lyt, vars, - variables = list(strata = NULL), - method = c("chisq", "schouten", "fisher", "cmh"), + var_labels = vars, na_str = default_na_str(), nested = TRUE, - ..., - var_labels = vars, show_labels = "hidden", table_names = vars, - .stats = NULL, - .formats = NULL, + section_div = NA_character_, + ..., + na_rm = TRUE, + variables = list(strata = NULL), + method = c("chisq", "schouten", "fisher", "cmh"), + .stats = c("pval"), + .stat_names = NULL, + .formats = c(pval = "x.xxxx | (<0.0001)"), .labels = NULL, - .indent_mods = NULL + .indent_mods = c(pval = 1L) ) s_test_proportion_diff( @@ -30,16 +33,18 @@ s_test_proportion_diff( .ref_group, .in_ref_col, variables = list(strata = NULL), - method = c("chisq", "schouten", "fisher", "cmh") + method = c("chisq", "schouten", "fisher", "cmh"), + ... ) a_test_proportion_diff( df, - .var, - .ref_group, - .in_ref_col, - variables = list(strata = NULL), - method = c("chisq", "schouten", "fisher", "cmh") + ..., + .stats = NULL, + .stat_names = NULL, + .formats = NULL, + .labels = NULL, + .indent_mods = NULL ) } \arguments{ @@ -47,10 +52,7 @@ a_test_proportion_diff( \item{vars}{(\code{character})\cr variable names for the primary analysis variable to be iterated over.} -\item{variables}{(named \code{list} of \code{string})\cr list of additional analysis variables.} - -\item{method}{(\code{string})\cr one of \code{chisq}, \code{cmh}, \code{fisher}, or \code{schouten}; specifies the test used -to calculate the p-value.} +\item{var_labels}{(\code{character})\cr variable labels.} \item{na_str}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.} @@ -58,19 +60,30 @@ to calculate the p-value.} possible (\code{TRUE}, the default) or as a new top-level element (\code{FALSE}). Ignored if it would nest a split. underneath analyses, which is not allowed.} -\item{...}{additional arguments for the lower level functions.} - -\item{var_labels}{(\code{character})\cr variable labels.} - \item{show_labels}{(\code{string})\cr label visibility: one of "default", "visible" and "hidden".} \item{table_names}{(\code{character})\cr this can be customized in the case that the same \code{vars} are analyzed multiple times, to avoid warnings from \code{rtables}.} +\item{section_div}{(\code{string})\cr string which should be repeated as a section divider after each group +defined by this split instruction, or \code{NA_character_} (the default) for no section divider.} + +\item{...}{additional arguments for the lower level functions.} + +\item{na_rm}{(\code{flag})\cr whether \code{NA} values should be removed from \code{x} prior to analysis.} + +\item{variables}{(named \code{list} of \code{string})\cr list of additional analysis variables.} + +\item{method}{(\code{string})\cr one of \code{chisq}, \code{cmh}, \code{fisher}, or \code{schouten}; specifies the test used +to calculate the p-value.} + \item{.stats}{(\code{character})\cr statistics to select for the table. Options are: \code{'pval'}} +\item{.stat_names}{(\code{character})\cr names of the statistics that are passed directly to name single statistics +(\code{.stats}). This option is visible when producing \code{\link[rtables:data.frame_export]{rtables::as_result_df()}} with \code{make_ard = TRUE}.} + \item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics. See Details in \code{analyze_vars} for more information on the \code{"auto"} setting.} diff --git a/man/summarize_num_patients.Rd b/man/summarize_num_patients.Rd index 1f0cfa1d3a..f15b929c06 100644 --- a/man/summarize_num_patients.Rd +++ b/man/summarize_num_patients.Rd @@ -10,43 +10,53 @@ analyze_num_patients( lyt, vars, + var_labels = vars, + riskdiff = FALSE, + na_str = default_na_str(), + nested = TRUE, + table_names = vars, + show_labels = c("default", "visible", "hidden"), + section_div = NA_character_, + ..., + na_rm = TRUE, required = NULL, count_by = NULL, unique_count_suffix = TRUE, - na_str = default_na_str(), - nested = TRUE, - .stats = NULL, - .formats = NULL, + .stats = c("unique", "nonunique", "unique_count"), + .stat_names = NULL, + .formats = c(unique = format_count_fraction_fixed_dp, nonunique = "xx", unique_count = + "xx"), .labels = c(unique = "Number of patients with at least one event", nonunique = "Number of events"), - show_labels = c("default", "visible", "hidden"), - .indent_mods = 0L, - riskdiff = FALSE, - ... + .indent_mods = 0L ) summarize_num_patients( lyt, var, + na_str = default_na_str(), + riskdiff = FALSE, + ..., + na_rm = TRUE, required = NULL, count_by = NULL, unique_count_suffix = TRUE, - na_str = default_na_str(), - .stats = NULL, - .formats = NULL, + .stats = c("unique", "nonunique", "unique_count"), + .stat_names = NULL, + .formats = c(unique = format_count_fraction_fixed_dp, nonunique = "xx", unique_count = + "xx"), .labels = c(unique = "Number of patients with at least one event", nonunique = "Number of events"), - .indent_mods = 0L, - riskdiff = FALSE, - ... + .indent_mods = 0L ) s_num_patients( x, - labelstr, + labelstr = "", .N_col, count_by = NULL, - unique_count_suffix = TRUE + unique_count_suffix = TRUE, + ... ) s_num_patients_content( @@ -56,7 +66,8 @@ s_num_patients_content( .var, required = NULL, count_by = NULL, - unique_count_suffix = TRUE + unique_count_suffix = TRUE, + ... ) } \arguments{ @@ -64,13 +75,11 @@ s_num_patients_content( \item{vars}{(\code{character})\cr variable names for the primary analysis variable to be iterated over.} -\item{required}{(\code{character} or \code{NULL})\cr name of a variable that is required to be non-missing.} +\item{var_labels}{(\code{character})\cr variable labels.} -\item{count_by}{(\code{character} or \code{NULL})\cr name of a variable to be combined with \code{vars} when counting -\code{nonunique} records.} - -\item{unique_count_suffix}{(\code{flag})\cr whether the \code{"(n)"} suffix should be added to \code{unique_count} labels. -Defaults to \code{TRUE}.} +\item{riskdiff}{(\code{flag})\cr whether a risk difference column is present. When set to \code{TRUE}, \code{\link[=add_riskdiff]{add_riskdiff()}} must be +used as \code{split_fun} in the prior column split of the table layout, specifying which columns should be compared. +See \code{\link[=stat_propdiff_ci]{stat_propdiff_ci()}} for details on risk difference calculation.} \item{na_str}{(\code{string})\cr string used to replace all \code{NA} or empty values in the output.} @@ -78,26 +87,41 @@ Defaults to \code{TRUE}.} possible (\code{TRUE}, the default) or as a new top-level element (\code{FALSE}). Ignored if it would nest a split. underneath analyses, which is not allowed.} +\item{table_names}{(\code{character})\cr this can be customized in the case that the same \code{vars} are analyzed multiple +times, to avoid warnings from \code{rtables}.} + +\item{show_labels}{(\code{string})\cr label visibility: one of "default", "visible" and "hidden".} + +\item{section_div}{(\code{string})\cr string which should be repeated as a section divider after each group +defined by this split instruction, or \code{NA_character_} (the default) for no section divider.} + +\item{...}{additional arguments for the lower level functions.} + +\item{na_rm}{(\code{flag})\cr whether \code{NA} values should be removed from \code{x} prior to analysis.} + +\item{required}{(\code{character} or \code{NULL})\cr name of a variable that is required to be non-missing.} + +\item{count_by}{(\code{character} or \code{NULL})\cr name of a variable to be combined with \code{vars} when counting +\code{nonunique} records.} + +\item{unique_count_suffix}{(\code{flag})\cr whether the \code{"(n)"} suffix should be added to \code{unique_count} labels. +Defaults to \code{TRUE}.} + \item{.stats}{(\code{character})\cr statistics to select for the table. Options are: \verb{'unique', 'nonunique', 'unique_count'}} +\item{.stat_names}{(\code{character})\cr names of the statistics that are passed directly to name single statistics +(\code{.stats}). This option is visible when producing \code{\link[rtables:data.frame_export]{rtables::as_result_df()}} with \code{make_ard = TRUE}.} + \item{.formats}{(named \code{character} or \code{list})\cr formats for the statistics. See Details in \code{analyze_vars} for more information on the \code{"auto"} setting.} \item{.labels}{(named \code{character})\cr labels for the statistics (without indent).} -\item{show_labels}{(\code{string})\cr label visibility: one of "default", "visible" and "hidden".} - \item{.indent_mods}{(named \code{integer})\cr indent modifiers for the labels. Defaults to 0, which corresponds to the unmodified default behavior. Can be negative.} -\item{riskdiff}{(\code{flag})\cr whether a risk difference column is present. When set to \code{TRUE}, \code{\link[=add_riskdiff]{add_riskdiff()}} must be -used as \code{split_fun} in the prior column split of the table layout, specifying which columns should be compared. -See \code{\link[=stat_propdiff_ci]{stat_propdiff_ci()}} for details on risk difference calculation.} - -\item{...}{additional arguments for the lower level functions.} - \item{x}{(\code{character} or \code{factor})\cr vector of patient IDs.} \item{labelstr}{(\code{string})\cr label of the level of the parent split currently being summarized