diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 7e6b4d4..0000000 Binary files a/.DS_Store and /dev/null differ diff --git a/R/caseduplicate.R b/R/caseduplicate.R index 37b4a21..f6d7f10 100644 --- a/R/caseduplicate.R +++ b/R/caseduplicate.R @@ -7,20 +7,33 @@ # (Miettinen/Schouten approach to directly estimating relative risks) estimate_duplicate <- function(formula, data, ...) { yvar <- as.character(all.vars(formula)[1]) - data <- data %>% - dplyr::mutate(.clusterid = dplyr::row_number()) - data <- dplyr::bind_rows(data, - data %>% - dplyr::rename(outc = dplyr::one_of(!!yvar)) %>% - dplyr::filter(.data$outc == 1) %>% - dplyr::mutate(outc = 0) %>% - dplyr::rename(!!yvar := "outc")) - fit <- eval(substitute(stats::glm(formula = formula, - family = binomial(link = "logit"), - data = data))) + + #TODO the following assumes that outc is coded as 0/1. We should throw an + # error when this is not true + data <- data |> + dplyr::mutate(.clusterid = dplyr::row_number()) |> + dplyr::rename(outc = dplyr::one_of(!!yvar)) |> + tidyr::uncount(outc + 1) |> + dplyr::group_by(.clusterid) |> + dplyr::mutate(outc = floor(outc / dplyr::row_number())) |> + dplyr::ungroup() |> + dplyr::rename(!!yvar := "outc") + + fit <- eval(substitute( + stats::glm( + formula = formula, + family = binomial(link = "logit"), + data = data + ) + )) class(fit) <- c("duplicate", class(fit)) - fit <- estimate_maxprob(fit = fit, formula = formula, data = data, - link = "logit") + fit <- estimate_maxprob( + fit = fit, + formula = formula, + data = data, + link = "logit" + ) + return(fit) } @@ -46,19 +59,24 @@ confint.duplicate <- function(object, parm = NULL, level = 0.95, ...) { # parm <- pnames[parm] a <- (1 - level)/2 a <- c(a, 1 - a) - pct <- paste(format(100 * a, trim = TRUE, scientific = FALSE, digits = 3), - "%") + pct <- paste( + format(100 * a, trim = TRUE, scientific = FALSE, digits = 3), + "%" + ) fac <- qnorm(a) ci <- array(NA, dim = c(length(parm), 2L), dimnames = list(parm, pct)) # Robust covariance accounting for clustering obj_sandwich <- object class(obj_sandwich) <- "glm" - ses <- sqrt(diag(sandwich::sandwich( - x = obj_sandwich, - bread. = sandwich::bread(obj_sandwich), - meat. = sandwich::meatCL(obj_sandwich, - type = "HC0", - cluster = object$data$.clusterid)))) + ses <- sqrt(diag( + sandwich::sandwich( + x = obj_sandwich, + bread. = sandwich::bread(obj_sandwich), + meat. = sandwich::meatCL( + obj_sandwich, type = "HC0", cluster = object$data$.clusterid + ) + ) + )) ci[] <- cf[parm] + ses %o% fac return(ci) } @@ -77,13 +95,14 @@ confint.duplicate <- function(object, parm = NULL, level = 0.95, ...) { #' @param symbolic.cor Not used #' @param ... Other arguments, not used #' @export -summary.duplicate <- function(object, dispersion = NULL, correlation = FALSE, - symbolic.cor = FALSE, ...) { +summary.duplicate <- function( + object, dispersion = NULL, correlation = FALSE, symbolic.cor = FALSE, ... +) { # a modification of summary.glm() est.disp <- FALSE df.r <- object$df.residual - if (is.null(dispersion)) - dispersion <- if (object$family$family %in% c("poisson", "binomial")) + if(is.null(dispersion)) + dispersion <- if(object$family$family %in% c("poisson", "binomial")) 1 else if (df.r > 0) { est.disp <- TRUE @@ -111,9 +130,9 @@ summary.duplicate <- function(object, dispersion = NULL, correlation = FALSE, covmat.unscaled <- sandwich::sandwich( x = obj_sandwich, bread. = sandwich::bread(x = obj_sandwich), - meat. = sandwich::meatCL(x = obj_sandwich, - type = "HC0", - cluster = object$data$.clusterid)) + meat. = sandwich::meatCL( + x = obj_sandwich, type = "HC0", cluster = object$data$.clusterid) + ) ###### end changes ######## covmat <- dispersion * covmat.unscaled var.cf <- diag(covmat) @@ -123,26 +142,29 @@ summary.duplicate <- function(object, dispersion = NULL, correlation = FALSE, if (!est.disp) { pvalue <- 2 * pnorm(-abs(tvalue)) coef.table <- cbind(coef.p, s.err, tvalue, pvalue) - dimnames(coef.table) <- list(names(coef.p), c(dn, - "z value", "Pr(>|z|)")) + dimnames(coef.table) <- list( + names(coef.p), c(dn, "z value", "Pr(>|z|)") + ) } else if (df.r > 0) { pvalue <- 2 * pt(-abs(tvalue), df.r) coef.table <- cbind(coef.p, s.err, tvalue, pvalue) - dimnames(coef.table) <- list(names(coef.p), c(dn, - "t value", "Pr(>|t|)")) + dimnames(coef.table) <- list( + names(coef.p), c(dn, "t value", "Pr(>|t|)") + ) } else { coef.table <- cbind(coef.p, NaN, NaN, NaN) - dimnames(coef.table) <- list(names(coef.p), c(dn, - "t value", "Pr(>|t|)")) + dimnames(coef.table) <- list( + names(coef.p), c(dn, "t value", "Pr(>|t|)") + ) } df.f <- NCOL(Qr$qr) } else { - coef.table <- matrix(, 0L, 4L) + coef.table <- matrix(data = NA, 0L, 4L) dimnames(coef.table) <- list(NULL, c("Estimate", "Std. Error", "t value", "Pr(>|t|)")) - covmat.unscaled <- covmat <- matrix(, 0L, 0L) + covmat.unscaled <- covmat <- matrix(data = NA, 0L, 0L) df.f <- length(aliased) } keep <- match(c("call", "terms", "family", "deviance", "aic", diff --git a/R/estimate_risk.R b/R/estimate_risk.R index 26ce178..323eef3 100644 --- a/R/estimate_risk.R +++ b/R/estimate_risk.R @@ -163,10 +163,12 @@ riskratio <- function( "margstd_boot", "margstd_delta", "logistic", - "legacy"), + "legacy" + ), variable = NULL, at = NULL, - ...) { + ... +) { estimate_risk( formula = formula, data = data, @@ -174,7 +176,8 @@ riskratio <- function( approach = approach, variable = variable, at = at, - ...) + ... + ) } #' @describeIn riskratio Fit risk difference models @@ -192,9 +195,12 @@ riskdiff <- function( "glm_cem_startp", "margstd_boot", "margstd_delta", - "legacy"), + "legacy" + ), variable = NULL, - at = NULL, ...) { + at = NULL, + ... +) { estimate_risk( formula = formula, data = data, @@ -202,7 +208,8 @@ riskdiff <- function( approach = approach, variable = variable, at = at, - ...) + ... + ) } # Workhorse for riskratio and riskdiff @@ -213,393 +220,495 @@ estimate_risk <- function( approach, variable = NULL, at = NULL, - ...) { + ... +) { implausible <- 0.99999 estimand <- match.arg(estimand) - link <- switch( - EXPR = estimand[1], - rr = "log", - rd = "identity") - if(link == "log") + + if(estimand[1] == "rr") { + link = "log" possible_approaches <- as.character(as.list( args(risks::riskratio))$approach)[-1] - else + } + if(estimand[1] == "rd") { + link = "identity" possible_approaches <- as.character(as.list( args(risks::riskdiff))$approach)[-1] + } + if(!(approach[1] %in% possible_approaches)) - stop(paste0( + rlang::abort(paste0( "Approach '", approach[1], "' is not implemented. ", "Available are: ", paste(possible_approaches, sep = ", ", collapse = ", "), - ".")) - - fit <- switch( - EXPR = approach[1], - # Automated model fitting, new approach, always choosing consistent model - auto = { - # 1) check if marginal standardization with delta CIs is feasible - fit <- possibly_estimate_margstd_delta( - formula = formula, - data = data, - estimand = estimand, - variable = variable, - at = at, - interaction_warning = FALSE, - ...) - if(fit$converged == TRUE & - fit$maxprob < implausible & - fit$boundary == FALSE & - fit$margstd_delta_interaction == FALSE) - return(fit) - - # 2) default to marginal standardization with bootstrap CIs - fit <- possibly_estimate_margstd_boot( - formula = formula, - data = data, - estimand = estimand, - variable = variable, - at = at, - ...) - - if(fit$converged == TRUE & - fit$maxprob < implausible & - fit$boundary == FALSE) - return(fit) - - # 3) Check if a logistic model can be fitted - fit <- stats::glm( - formula = formula, - data = data, - family = stats::binomial(link = "logit")) - # Typically, execution will stop with a non-converged logistic model. - # If, surprisingly, only a logistic model converges, return an error. - stop(paste( - "No model besides the logistic model converged and had", - "within-range predicted probabilities of < 1.")) - }, - # Automated model fitting, legacy approach, choosing different models - legacy = { - # 1) try regular GLM with Fisher scoring - fit_glm <- possibly_estimate_glm( + "." + )) + + # Automated model fitting, new approach, always choosing consistent model + if(approach[1] == "auto") { + + # 1) check if marginal standardization with delta CIs is feasible + fit <- possibly_estimate_margstd_delta( + formula = formula, + data = data, + estimand = estimand, + variable = variable, + at = at, + interaction_warning = FALSE, + ... + ) + if( + fit$converged == TRUE & + fit$maxprob < implausible & + fit$boundary == FALSE & + fit$margstd_delta_interaction == FALSE + ) {return(fit)} + + # 2) default to marginal standardization with bootstrap CIs + fit <- possibly_estimate_margstd_boot( + formula = formula, + data = data, + estimand = estimand, + variable = variable, + at = at, + ... + ) + if( + fit$converged == TRUE & + fit$maxprob < implausible & + fit$boundary == FALSE + ) {return(fit)} + + # 3) Check if a logistic model can be fitted + fit <- stats::glm( + formula = formula, + data = data, + family = stats::binomial(link = "logit") + ) + # Typically, execution will stop with a non-converged logistic model. + # If, surprisingly, only a logistic model converges, return an error. + rlang::abort(paste( + "No model besides the logistic model converged and had", + "within-range predicted probabilities of < 1." + )) + return(fit) + } + + # Automated model fitting, legacy approach, choosing different models + if(approach[1] == "legacy") { + + # 1) try regular GLM with Fisher scoring + fit_glm <- possibly_estimate_glm( + formula = formula, + data = data, + link = link, + ... + ) + if( + fit_glm$converged == TRUE & + fit_glm$maxprob < implausible & + fit_glm$boundary == FALSE + ) {return(fit_glm)} + + # 2) try GLM with starting values from Poisson for RRs only + if(link == "log") { + fit_poisson <- possibly_estimate_poisson( formula = formula, data = data, link = link, - ...) - if(fit_glm$converged == TRUE & - fit_glm$maxprob < implausible & - fit_glm$boundary == FALSE) - return(fit_glm) - - # 2) try GLM with starting values from Poisson for RRs only - if(link == "log") { - fit_poisson <- possibly_estimate_poisson( + ... + ) + if(fit_poisson$converged == TRUE) { + fit_glm_start <- possibly_estimate_glm_startp( formula = formula, data = data, link = link, - ...) - if(fit_poisson$converged == TRUE) { - fit_glm_start <- possibly_estimate_glm_startp( - formula = formula, - data = data, - link = link, - start = coef(fit_poisson), - start_type = "p", - ...) - if(fit_glm_start$converged == TRUE & - fit_glm_start$maxprob < implausible & - fit_glm_start$boundary == FALSE) - return(fit_glm_start) - } + start = coef(fit_poisson), + start_type = "p", + ... + ) + if( + fit_glm_start$converged == TRUE & + fit_glm_start$maxprob < implausible & + fit_glm_start$boundary == FALSE + ) {return(fit_glm_start)} } + } - # 3) Try marginal standardization with delta method SEs - fit_margstd_delta <- possibly_estimate_margstd_delta( - formula = formula, - data = data, - estimand = estimand, - variable = variable, - at = at, - interaction_warning = FALSE, - ...) - if(fit_margstd_delta$converged == TRUE & - fit_margstd_delta$maxprob < implausible & - fit_margstd_delta$boundary == FALSE & - fit_margstd_delta$margstd_delta_interaction == FALSE) - return(fit_margstd_delta) - - # 4) try marginal standardization with bootstrap SEs - fit <- possibly_estimate_margstd_boot( - formula = formula, - data = data, - estimand = estimand, - variable = variable, - at = at, - ...) - - if(fit$converged == TRUE & - fit$maxprob < implausible & - fit$boundary == FALSE) - return(fit) - - # 4) Check if a logistic model can be fitted - res <- stats::glm( - formula = formula, - data = data, - family = stats::binomial(link = "logit")) - # Typically, execution will stop with a non-converged logistic model. - # If, surprisingly, only a logistic model converges, return an error. - stop(paste( - "No model besides the logistic model converged and had", - "within-range predicted probabilities of < 1.")) - }, - - # All models requested to fit - all = { - fit1 <- possibly_estimate_poisson( + # 3) Try marginal standardization with delta method SEs + fit_margstd_delta <- possibly_estimate_margstd_delta( + formula = formula, + data = data, + estimand = estimand, + variable = variable, + at = at, + interaction_warning = FALSE, + ... + ) + if( + fit_margstd_delta$converged == TRUE & + fit_margstd_delta$maxprob < implausible & + fit_margstd_delta$boundary == FALSE & + fit_margstd_delta$margstd_delta_interaction == FALSE + ) {return(fit_margstd_delta)} + + # 4) try marginal standardization with bootstrap SEs + fit <- possibly_estimate_margstd_boot( + formula = formula, + data = data, + estimand = estimand, + variable = variable, + at = at, + ... + ) + if( + fit$converged == TRUE & + fit$maxprob < implausible & + fit$boundary == FALSE + ) {return(fit)} + + # 4) Check if a logistic model can be fitted + res <- stats::glm( + formula = formula, + data = data, + family = stats::binomial(link = "logit") + ) + # Typically, execution will stop with a non-converged logistic model. + # If, surprisingly, only a logistic model converges, return an error. + rlang::abort(paste( + "No model besides the logistic model converged and had", + "within-range predicted probabilities of < 1." + )) + } + + if(approach[1] == "all") { + + fit1 <- possibly_estimate_poisson( + formula = formula, + data = data, + link = link, + ... + ) + + fit2 <- possibly_estimate_glm( + formula = formula, + data = data, + link = link, + ... + ) + + if(!is.null(coef(fit1))) # attempt only if Poisson converged + fit3 <- possibly_estimate_glm_startp( formula = formula, data = data, link = link, - ...) + start = coef(fit1), + start_type = "p", + ... + ) + else # make possibly_estimate_glm return a non-converged object + fit3 <- possibly_estimate_glm_startp( + formula = "nonsense", + data = "nodata", + start_type = "p" + ) - fit2 <- possibly_estimate_glm( + if(link == "log") + fit4 <- possibly_estimate_logbin( formula = formula, data = data, - link = link, - ...) + ... + ) + if(link == "identity") + fit4 <- possibly_estimate_addreg( + formula = formula, + data = data, + ... + ) - if(!is.null(coef(fit1))) # attempt only if Poisson converged - fit3 <- possibly_estimate_glm_startp( + if(link == "log") { + if(!is.null(coef(fit1))) # attempt only if Poisson converged + fit5 <- possibly_estimate_logbin( formula = formula, data = data, - link = link, start = coef(fit1), - start_type = "p", - ...) - else # make possibly_estimate_glm return a non-converged object - fit3 <- possibly_estimate_glm_startp( + ... + ) + #TODO does this do anything or can we give a meaningful message? + else + fit5 <- possibly_estimate_logbin( formula = "nonsense", - data = "nodata", - start_type = "p") + data = "nodata" + ) + } - if(link == "log") - fit4 <- possibly_estimate_logbin( + if(link == "identity") { + if(!is.null(coef(fit1))) # attempt only if Poisson converged + fit5 <- possibly_estimate_addreg( formula = formula, data = data, - ...) + start = coef(fit1), + ... + ) + #TODO if this does the same as above, we can move both out of the + # "if" sections like the converged check below else - fit4 <- possibly_estimate_addreg( - formula = formula, - data = data, - ...) - - if(link == "log") { - if(!is.null(coef(fit1))) # attempt only if Poisson converged - fit5 <- possibly_estimate_logbin( - formula = formula, - data = data, - start = coef(fit1), - ...) - else - fit5 <- possibly_estimate_logbin( - formula = "nonsense", - data = "nodata") - if(fit5$converged == FALSE) - fit5$risks_start = "_start" - } else { - if(!is.null(coef(fit1))) # attempt only if Poisson converged - fit5 <- possibly_estimate_addreg( - formula = formula, - data = data, - start = coef(fit1), - ...) - else - fit5 <- possibly_estimate_addreg( - formula = "nonsense", - data = "nodata") - if(fit5$converged == FALSE) - fit5$risks_start = "_start" - } + fit5 <- possibly_estimate_addreg( + formula = "nonsense", + data = "nodata" + ) + } - fit6 <- possibly_estimate_margstd_boot( + if(fit5$converged == FALSE) + fit5$risks_start = "_start" + + fit6 <- possibly_estimate_margstd_boot( + formula = formula, + data = data, + estimand = estimand, + variable = variable, + at = at, + ... + ) + + fit7 <- possibly_estimate_margstd_delta( + formula = formula, + data = data, + estimand = estimand, + variable = variable, + at = at, + ... + ) + + # If RR requested, add on case-duplication model and, for comparison, + # the plain logistic model + if(estimand == "rr") { + + fit8 <- possibly_estimate_logistic( formula = formula, data = data, - estimand = estimand, - variable = variable, - at = at, - ...) + ... + ) - fit7 <- possibly_estimate_margstd_delta( + fit9 <- possibly_estimate_duplicate( formula = formula, data = data, - estimand = estimand, - variable = variable, - at = at, - ...) - - # If RR requested, add on case-duplication model and, for comparison, - # the plain logistic model - if(estimand == "rr") { - fit8 <- possibly_estimate_logistic( - formula = formula, - data = data, - ...) - fit9 <- possibly_estimate_duplicate( + ... + ) + + if(!is.null(coef(fit9))) # attempt only if 'duplicate' converged + fit10 <- possibly_estimate_glm_startd( formula = formula, data = data, - ...) - - if(!is.null(coef(fit9))) # attempt only if 'duplicate' converged - fit10 <- possibly_estimate_glm_startd( - formula = formula, - data = data, - link = link, - start = coef(fit9), - start_type = "d", - ...) - else # make possibly_estimate_glm return a non-converged object - fit10 <- possibly_estimate_glm_startd( - formula = "nonsense", - data = "nodata", - start_type = "d") - - fit1$all_models = list( - robpoisson = fit1, - glm = fit2, - glm_startp = fit3, - glm_cem = fit4, - glm_cem_startp = fit5, - margstd_boot = fit6, - margstd_delta = fit7, - logistic = fit8, - duplicate = fit9, - glm_startd = fit10) - } else - fit1$all_models = list( - robpoisson = fit1, - glm = fit2, - glm_start = fit3, - glm_cem = fit4, - glm_cem_startp = fit5, - margstd_boot = fit6, - margstd_delta = fit7) - fit1 - }, - - # Specific models that were directly requested - robpoisson = estimate_poisson( + link = link, + start = coef(fit9), + start_type = "d", + ... + ) + #TODO seeing a pattern: let's make returning a non-converged object + # an internal function that can be used in all cases like this + else # make possibly_estimate_glm return a non-converged object + fit10 <- possibly_estimate_glm_startd( + formula = "nonsense", + data = "nodata", + start_type = "d" + ) + + fit1$all_models = list( + robpoisson = fit1, + glm = fit2, + glm_startp = fit3, + glm_cem = fit4, + glm_cem_startp = fit5, + margstd_boot = fit6, + margstd_delta = fit7, + logistic = fit8, + duplicate = fit9, + glm_startd = fit10) + # TODO the below else is a great example of why they are best avoided + # in favor of explicit "if" conditions. I needed to jump way up to find + # that this is the "else" that happens when estimand is not equal to "rr". + # What else can it be? Let's just spell it clearly here instead of else. + } else + fit1$all_models = list( + robpoisson = fit1, + glm = fit2, + glm_start = fit3, + glm_cem = fit4, + glm_cem_startp = fit5, + margstd_boot = fit6, + margstd_delta = fit7) + #TODO I think this is supposed to be a return()? + fit1 + } + + if(approach[1] == "robpoisson") { + fit <- estimate_poisson( formula = formula, data = data, link = link, - ...), + ... + ) + } + + #TODO I think duplicate is only valid for RRs? If so we should add a check + if(approach[1] == "duplicate") { + #TODO I think this is supposed to be assigning to the object "fit" or did I + # lose something in the refactor? duplicate = estimate_duplicate( formula = formula, data = data, - ...), - glm = estimate_glm( + ... + ) + } + + if(approach[1] == "glm") { + fit <- estimate_glm( formula = formula, data = data, link = link, - ...), - glm_startp = { - fit_poisson <- estimate_poisson( - formula = formula, - data = data, - link = link, - ...) - estimate_glm( + ... + ) + } + + if(approach[1] == "glm_startp") { + fit_poisson <- estimate_poisson( + formula = formula, + data = data, + link = link, + ... + ) + fit <- estimate_glm( + formula = formula, + data = data, + link = link, + start = coef(fit_poisson), + start_type = "p", + ... + ) + } + + if(approach[1] == "glm_startd") { + fit_duplicate <- estimate_duplicate( + formula = formula, + data = data, + ... + ) + fit <- estimate_glm( + formula = formula, + data = data, + link = link, + start = coef(fit_duplicate), + start_type = "d", + ... + ) + } + + if(approach[1] == "glm_cem") { +#TODO why not just require logbin and addreg as imports? They seem pretty +# fundamental to key functions in this package. Then these error messages could +# be removed + if(link == "log") { + if(!requireNamespace("logbin", quietly = TRUE)) + rlang::abort(paste( + "For this approach, the 'logbin' package must be installed:", + 'install.packages("logbin")'), + call. = FALSE + ) + fit <- estimate_logbin( formula = formula, data = data, - link = link, - start = coef(fit_poisson), - start_type = "p", - ...) - }, - glm_startd = { - fit_duplicate <- estimate_duplicate( + ... + ) + } + + if(link == "identity") { + if(!requireNamespace("addreg", quietly = TRUE)) + rlang::abort(paste( + "For this approach, the 'addreg' package must be installed:", + 'install.packages("addreg")'), + call. = FALSE + ) + fit <- estimate_addreg( formula = formula, data = data, - ...) - estimate_glm( + ... + ) + } + } + + if(approach[1] == "glm_cem_startp") { + + fit_poisson <- estimate_poisson( + formula = formula, + data = data, + link = link, + ... + ) + + #TODO possibly remove error messaging as above + if(link == "log") { + if(!requireNamespace("logbin", quietly = TRUE)) + rlang::abort(paste( + "For this approach, the 'logbin' package must be installed:", + 'install.packages("logbin")'), + call. = FALSE + ) + fit <- estimate_logbin( formula = formula, data = data, - link = link, - start = coef(fit_duplicate), - start_type = "d", - ...) - }, - glm_cem = { - if(link == "log") { - if(!requireNamespace("logbin", quietly = TRUE)) - stop(paste( - "For this approach, the 'logbin' package must be installed:", - 'install.packages("logbin")'), - call. = FALSE) - estimate_logbin( - formula = formula, - data = data, - ...) - } else { - if(!requireNamespace("addreg", quietly = TRUE)) - stop(paste( - "For this approach, the 'addreg' package must be installed:", - 'install.packages("addreg")'), - call. = FALSE) - estimate_addreg( - formula = formula, - data = data, - ...) - } - }, - glm_cem_startp = { - fit_poisson <- estimate_poisson( + start = coef(fit_poisson), + start_type = "p", + ... + ) + } + + if(link == "identity") { + if(!requireNamespace("addreg", quietly = TRUE)) + stop(paste( + "For this approach, the 'addreg' package must be installed:", + 'install.packages("addreg")'), + call. = FALSE + ) + fit <- estimate_addreg( formula = formula, data = data, - link = link, - ...) - if(link == "log") { - if(!requireNamespace("logbin", quietly = TRUE)) - stop(paste( - "For this approach, the 'logbin' package must be installed:", - 'install.packages("logbin")'), - call. = FALSE) - estimate_logbin( - formula = formula, - data = data, - start = coef(fit_poisson), - start_type = "p", - ...) - } else { - if(!requireNamespace("addreg", quietly = TRUE)) - stop(paste( - "For this approach, the 'addreg' package must be installed:", - 'install.packages("addreg")'), - call. = FALSE) - estimate_addreg( - formula = formula, - data = data, - start = coef(fit_poisson), - start_type = "p", - ...) - } - }, - margstd_boot = estimate_margstd_boot( + start = coef(fit_poisson), + start_type = "p", + ... + ) + } + } + + if(approach[1] == "margstd_boot") { + fit <- estimate_margstd_boot( formula = formula, data = data, estimand = estimand, variable = variable, at = at, - ...), - margstd_delta = estimate_margstd_delta( + ... + ) + } + + if(approach[1] == "margstd_delta") { + fit <- estimate_margstd_delta( formula = formula, data = data, estimand = estimand, variable = variable, at = at, - ...), - logistic = estimate_logistic( + ... + ) + } + + if(approach[1] == "logistic") { + fit <- estimate_logistic( formula = formula, data = data, - ...) - ) + ... + ) + } + return(fit) } diff --git a/vignettes/margstd.Rmd b/vignettes/margstd.Rmd index 95e9aa8..0924190 100644 --- a/vignettes/margstd.Rmd +++ b/vignettes/margstd.Rmd @@ -27,15 +27,16 @@ We use the same example data as in the [Get Started vignette](risks.html#an-exam library(risks) # provides riskratio(), riskdiff(), postestimation functions library(dplyr) # For data handling library(broom) # For tidy() model summaries -data(breastcancer) ``` We fit the same risk difference model as in the [Get Started vignette](risks.html#adjusted-risk-differences), this time explicitly specifying `approach = "margstd_delta"`: ```{r margstd} -fit_margstd <- riskdiff(formula = death ~ stage + receptor, - data = breastcancer, - approach = "margstd_delta") +fit_margstd <- riskdiff( + formula = death ~ stage + receptor, + data = breastcancer, + approach = "margstd_delta" +) summary(fit_margstd) ``` diff --git a/vignettes/models.Rmd b/vignettes/models.Rmd index c47ff5e..25acf0b 100644 --- a/vignettes/models.Rmd +++ b/vignettes/models.Rmd @@ -50,24 +50,27 @@ We load the same example data as in the [Get Started vignette](risks.html#an-exa library(risks) # provides riskratio(), riskdiff(), postestimation functions library(dplyr) # For data handling library(broom) # For tidy() model summaries -data(breastcancer) ``` We then select a binomial model with starting values from the Poisson model: ```{r selectapproach} -riskratio(formula = death ~ stage + receptor, - data = breastcancer, - approach = "glm_startp") +riskratio( + formula = death ~ stage + receptor, + data = breastcancer, + approach = "glm_startp" +) ``` \ However, the binomial model without starting values does not converge: ```{r selectapproach2, error = TRUE} -riskratio(formula = death ~ stage + receptor, - data = breastcancer, - approach = "glm") +riskratio( + formula = death ~ stage + receptor, + data = breastcancer, + approach = "glm" +) ``` @@ -77,9 +80,11 @@ riskratio(formula = death ~ stage + receptor, With `approach = "all"`, all model types listed in the tables are fitted. The fitted object, *e.g.*, `fit`, is one of the converged models. A summary of the convergence status of all models is displayed at the beginning of `summary(fit)`: ```{r allmodels} -fit_all <- riskratio(formula = death ~ stage + receptor, - data = breastcancer, - approach = "all") +fit_all <- riskratio( + formula = death ~ stage + receptor, + data = breastcancer, + approach = "all" +) summary(fit_all) ``` @@ -87,7 +92,7 @@ summary(fit_all) Individual models can be accessed as `fit$all_models[[1]]` through `fit$all_models[[6]]` (or `[[7]]` if fitting a risk ratio model). `tidy()` shows coefficients and confidence intervals from all models that converged: ```{r allmodels2} -tidy(fit_all) %>% - select(-statistic, -p.value) %>% +tidy(fit_all) |> + select(-statistic, -p.value) |> print(n = 50) ``` diff --git a/vignettes/risks.Rmd b/vignettes/risks.Rmd index 2838d04..b5f9e59 100644 --- a/vignettes/risks.Rmd +++ b/vignettes/risks.Rmd @@ -32,11 +32,14 @@ library(risks) # provides riskratio(), riskdiff(), postestimation functions library(dplyr) # For data handling library(broom) # For tidy() model summaries -data(breastcancer) # Load sample data - -breastcancer %>% # Display the sample data - group_by(receptor, stage) %>% - summarize(deaths = sum(death), total = n(), risk = deaths/total) +breastcancer |> # Summarize the sample data + group_by(receptor, stage) |> + summarize( + deaths = sum(death), + total = n(), + risk = deaths/total, + .groups = "keep" + ) ``` @@ -45,7 +48,7 @@ breastcancer %>% # Display the sample data The risk of death is higher among women with higher-stage and hormone receptor-low cancers, which also tend to be of higher stage. Using `risks` models to obtain (possibly multivariable-adjusted) risk ratios or risk differences is similar to the standard code for logistic models in R. As customary in R, log(RR) is returned; see below for how to obtain exponentiated values. No options for model `family` or `link` need to be supplied: ```{r basic} -fit_rr <- riskratio(formula = death ~ stage + receptor, data = breastcancer) +fit_rr <- riskratio(death ~ stage + receptor, data = breastcancer) summary(fit_rr) ``` @@ -55,7 +58,7 @@ summary(fit_rr) To obtain risk differences, use `riskdiff`, which has the same syntax: ```{r basic2} -fit_rd <- riskdiff(formula = death ~ stage + receptor, data = breastcancer) +fit_rd <- riskdiff(death ~ stage + receptor, data = breastcancer) summary(fit_rd) ``` @@ -76,7 +79,7 @@ tidy(fit_rd) ``` \ -In accordance with `glm()` standards, coefficients for relative risks are shown on the logarithmic scale. Exponentiated coefficients for risk ratios are easily obtained via `tidy(..., exponentiate = TRUE)`: +In accordance with `glm()` standards, coefficients for relative risks are shown on the logarithmic scale. Exponentiated coefficients for risk ratios are obtained via `tidy(..., exponentiate = TRUE)`: ```{r basic4} tidy(fit_rr, exponentiate = TRUE)