| 
 | 1 | +#' Log marginal likelihood calculation  | 
 | 2 | +#'  | 
 | 3 | +#' Calculate the log marginal likelihood with bridge sampling (Meng & Wong,  | 
 | 4 | +#' 1996). This is a wrapper around [bridgesampling::bridge_sampler()].  | 
 | 5 | +#' Therefore, log marginal likelihood calculation is currently only available  | 
 | 6 | +#' for models estimated with `{rstan}` using MCMC.  | 
 | 7 | +#'  | 
 | 8 | +#' @param x A [measrdcm][dcm_estimate()] object estimated with  | 
 | 9 | +#'   `backend = "optim"`.  | 
 | 10 | +#' @param ... Unused.  | 
 | 11 | +#' @param force If the criterion has already been added to the  | 
 | 12 | +#'   model object with [add_criterion()], should it be recalculated. Default is  | 
 | 13 | +#'   `FALSE`.  | 
 | 14 | +#'  | 
 | 15 | +#' @return The estimate of the log marginal likelihood.  | 
 | 16 | +#' @export  | 
 | 17 | +#'  | 
 | 18 | +#' @references Meng, X.-L., & Wong, W. H. (1996). Simulating ratios of  | 
 | 19 | +#'   normalizing constants via a simple identity: A theoretical exploration.  | 
 | 20 | +#'   *Statistical Sinica, 6*(4), 831-860.  | 
 | 21 | +#'   <https://www.jstor.org/stable/24306045>  | 
 | 22 | +#'  | 
 | 23 | +#' @examplesIf measr_examples()  | 
 | 24 | +#' model_spec <- dcm_specify(qmatrix = dcmdata::mdm_qmatrix,  | 
 | 25 | +#'                           identifier = "item")  | 
 | 26 | +#' model <- dcm_estimate(dcm_spec = model_spec, data = dcmdata::mdm_data,  | 
 | 27 | +#'                       identifier = "respondent", method = "mcmc",  | 
 | 28 | +#'                       seed = 63277)  | 
 | 29 | +#'  | 
 | 30 | +#' log_mll(model)  | 
 | 31 | +log_mll <- S7::new_generic(  | 
 | 32 | +  "log_mll",  | 
 | 33 | +  "x",  | 
 | 34 | +  function(x, ..., force = FALSE) {  | 
 | 35 | +    S7::S7_dispatch()  | 
 | 36 | +  }  | 
 | 37 | +)  | 
 | 38 | + | 
 | 39 | +S7::method(log_mll, measrdcm) <- function(x, force = FALSE) {  | 
 | 40 | +  if (!rlang::is_empty(x@criteria$log_mll) && !force) {  | 
 | 41 | +    return(x@criteria$log_mll)  | 
 | 42 | +  }  | 
 | 43 | + | 
 | 44 | +  if (!(S7::S7_inherits(x@method, mcmc) && S7::S7_inherits(x@backend, rstan))) {  | 
 | 45 | +    cli::cli_abort(  | 
 | 46 | +      glue::glue(  | 
 | 47 | +        "{{.arg {rlang::caller_arg(x)}}} must be a model estimated with ",  | 
 | 48 | +        "{{.code method = \"mcmc\"}} and {{.code backend = \"rstan\"}} to ",  | 
 | 49 | +        "calculate the marginal likelihood"  | 
 | 50 | +      )  | 
 | 51 | +    )  | 
 | 52 | +  }  | 
 | 53 | + | 
 | 54 | +  log_marg_lik <- bridgesampling::bridge_sampler(x@model, silent = TRUE)  | 
 | 55 | +  log_marg_lik$logml  | 
 | 56 | +}  | 
 | 57 | + | 
 | 58 | +#' Bayes factor for model comparisons  | 
 | 59 | +#'  | 
 | 60 | +#' Calculate the Bayes factor for model comparisons, which represents the  | 
 | 61 | +#' posterior odds of the null hypothesis when the prior probability of the null  | 
 | 62 | +#' model is 0.5 (Jeffreys, 1935; Kass & Raftery, 1995).  | 
 | 63 | +#' Consistent with the Bayesian reporting guidelines from Kruschke (2021), we  | 
 | 64 | +#' calculate the posterior probability of the null model for a variety of prior  | 
 | 65 | +#' probabilities, in addition to the Bayes factor.  | 
 | 66 | +#'  | 
 | 67 | +#' @param x A [measrdcm][dcm_estimate()] object.  | 
 | 68 | +#' @param ... Additional [measrdcm][dcm_estimate()] to be compared to `x`.  | 
 | 69 | +#' @param model_names Names given to each provided model in the comparison  | 
 | 70 | +#'   output. If `NULL` (the default), the names will be parsed from the names of  | 
 | 71 | +#'   the objects passed for comparison.  | 
 | 72 | +#' @param prior_prob A numeric vector of prior probabilities for the null model  | 
 | 73 | +#'   used to calculate the posterior probability of the null model relative to  | 
 | 74 | +#'   alternative model. See details for more information.  | 
 | 75 | +#'  | 
 | 76 | +#' @details  | 
 | 77 | +#' Bayes factors will be calculated for all possible pairwise comparisons  | 
 | 78 | +#' between the models provided to `x` and `...`. In each comparison, one model  | 
 | 79 | +#' is identified as the null model, and the other is the alternative. This  | 
 | 80 | +#' distinction is not terribly meaningful from a calculation standpoint, as the  | 
 | 81 | +#' probabilities for the alternative model are simply 1 minus the null  | 
 | 82 | +#' probabilities. If you want particular models to be labeled as the "null", the  | 
 | 83 | +#' determination is made by the order the models are sent to the function. That  | 
 | 84 | +#' is, `x` will always be the null model. The first model included in `...` will  | 
 | 85 | +#' be the alternative model when compared to `x` and the null model when  | 
 | 86 | +#' compared to all other models included in `...`. Similarly, the second model  | 
 | 87 | +#' included in `...` will be the alternative model when compared to `x` and the  | 
 | 88 | +#' first model included in `...` and the null model in all other comparisons.  | 
 | 89 | +#'  | 
 | 90 | +#' `prior_prob` is used to specify a vector of possible prior probabilities for  | 
 | 91 | +#' the null model. These are used in conjunction with the Bayes factor to  | 
 | 92 | +#' determine the posterior model probability for the null model, relative to the  | 
 | 93 | +#' alternative model. The posterior probability for the alternative model can  | 
 | 94 | +#' be calculated as 1 minus the null model's posterior probability. You may  | 
 | 95 | +#' specify a specific prior probability, or specify a range of possibilities to  | 
 | 96 | +#' construct a graph similar to Kruschke's (2021) Figure 1. These probabilities  | 
 | 97 | +#' can be interpreted as, "If the prior probability is \{`prior_prob_null`\},  | 
 | 98 | +#' then the posterior is \{`posterior_prob_null`\}" (or 1 minus for the  | 
 | 99 | +#' alternative model).  | 
 | 100 | +#'  | 
 | 101 | +#' @concept Bayes  | 
 | 102 | +#'  | 
 | 103 | +#' @return A [tibble][tibble::tibble-package] with one row per model comparison  | 
 | 104 | +#'   and four columns.  | 
 | 105 | +#'   * `null_model`: The null model in the comparison.  | 
 | 106 | +#'   * `alt_model`: The alternative model in the comparison.  | 
 | 107 | +#'   * `bf`: The estimated Bayes factor.  | 
 | 108 | +#'   * `posterior_probs`: A nested list column, where element element is a  | 
 | 109 | +#'     tibble with two columns:  | 
 | 110 | +#'     * `prior_prob_null`: The prior probability that the null model is  | 
 | 111 | +#'       correct.  | 
 | 112 | +#'     * `posterior_prob_null`: The posterior probability that the null model is  | 
 | 113 | +#'       correct.  | 
 | 114 | +#'  | 
 | 115 | +#'     The list column can be unnested with [tidyr::unnest()] (see examples). If  | 
 | 116 | +#'     `prior_prob` is `NULL`, the `posterior_probs` column is excluded from  | 
 | 117 | +#'     the returned object.  | 
 | 118 | +#' @export  | 
 | 119 | +#'  | 
 | 120 | +#' @references Jeffreys, H. (1935). Some tests of significance, treated by the  | 
 | 121 | +#'   theory of probability. *Mathematical Proceedings of the Cambridge  | 
 | 122 | +#'   Philosophical Society, 31*(2), 203-222. \doi{10.1017/S030500410001330X}  | 
 | 123 | +#' @references Kass, R. E., & Raftery, A. E. (1995). Bayes factors.  | 
 | 124 | +#'   *Journal of the American Statistical Association, 90*(430), 773-795.  | 
 | 125 | +#'   \doi{10.1080/01621459.1995.10476572}  | 
 | 126 | +#' @references Kruschke, J. K. (2021). Bayesian analysis reporting guidelines.  | 
 | 127 | +#'   *Nature, 5*, 1282-1291. \doi{10.1038/s41562-021-01177-7}  | 
 | 128 | +#'  | 
 | 129 | +#' @examplesIf measr_examples()  | 
 | 130 | +#' mdm_dina <- dcm_estimate(  | 
 | 131 | +#'   dcm_specify(dcmdata::mdm_qmatrix, identifier = "item",  | 
 | 132 | +#'               measurement_model = dina()),  | 
 | 133 | +#'   data = dcmdata::mdm_data, missing = NA, identifier = "respondent",  | 
 | 134 | +#'   method = "mcmc", seed = 63277, backend = "rstan",  | 
 | 135 | +#'   iter = 700, warmup = 500, chains = 2, refresh = 0  | 
 | 136 | +#' )  | 
 | 137 | +#'  | 
 | 138 | +#' mdm_dino <- dcm_estimate(  | 
 | 139 | +#'   dcm_specify(dcmdata::mdm_qmatrix, identifier = "item",  | 
 | 140 | +#'               measurement_model = dino()),  | 
 | 141 | +#'   data = dcmdata::mdm_data, missing = NA, identifier = "respondent",  | 
 | 142 | +#'   method = "mcmc", seed = 63277, backend = "rstan",  | 
 | 143 | +#'   iter = 700, warmup = 500, chains = 2, refresh = 0  | 
 | 144 | +#' )  | 
 | 145 | +#'  | 
 | 146 | +#' bf <- bayes_factor(mdm_dina, mdm_dino)  | 
 | 147 | +#' bf  | 
 | 148 | +#'  | 
 | 149 | +#' tidyr::unnest(bf, "posterior_probs")  | 
 | 150 | +bayes_factor <- S7::new_generic(  | 
 | 151 | +  "bayes_factor",  | 
 | 152 | +  "x",  | 
 | 153 | +  function(x, ..., model_names = NULL, prior_prob = seq(.02, .98, by = .02)) {  | 
 | 154 | +    for (i in seq_along(prior_prob)) {  | 
 | 155 | +      check_number_decimal(prior_prob[i], min = 0, max = 1, arg = "prior_prob")  | 
 | 156 | +    }  | 
 | 157 | + | 
 | 158 | +    S7::S7_dispatch()  | 
 | 159 | +  }  | 
 | 160 | +)  | 
 | 161 | + | 
 | 162 | +S7::method(bayes_factor, measrdcm) <- function(  | 
 | 163 | +  x,  | 
 | 164 | +  ...,  | 
 | 165 | +  model_names = NULL,  | 
 | 166 | +  prior_prob = seq(.02, .98, by = .02)  | 
 | 167 | +) {  | 
 | 168 | +  dots <- rlang::dots_list(..., .named = TRUE)  | 
 | 169 | +  dots_check <- vapply(dots, S7::S7_inherits, logical(1), class = measrdcm)  | 
 | 170 | +  if (!all(dots_check)) {  | 
 | 171 | +    msg <- paste(  | 
 | 172 | +      "{.arg {cli::cli_vec(names(dots)[!dots_check])}} must",  | 
 | 173 | +      "{?be a/be a/all be} {.cls measrdcm} object{?s}"  | 
 | 174 | +    )  | 
 | 175 | +    cli::cli_abort(msg)  | 
 | 176 | +  }  | 
 | 177 | +  all_models <- c(list(x), dots)  | 
 | 178 | + | 
 | 179 | +  if (is.null(model_names)) {  | 
 | 180 | +    model_names <- c(rlang::caller_arg(x), names(dots))  | 
 | 181 | +  } else if (length(model_names) != length(all_models)) {  | 
 | 182 | +    rdcmchecks::abort_bad_argument(  | 
 | 183 | +      arg = "model_names",  | 
 | 184 | +      must = glue::glue(  | 
 | 185 | +        "be of length {length(all_models)}, ",  | 
 | 186 | +        "the same as the number of models provided"  | 
 | 187 | +      ),  | 
 | 188 | +      not = length(model_names)  | 
 | 189 | +    )  | 
 | 190 | +  }  | 
 | 191 | + | 
 | 192 | +  all_models <- rlang::set_names(all_models, model_names)  | 
 | 193 | +  bf <- combn(model_names, m = 2) |>  | 
 | 194 | +    t() |>  | 
 | 195 | +    as.data.frame() |>  | 
 | 196 | +    tibble::as_tibble() |>  | 
 | 197 | +    dplyr::rowwise() |>  | 
 | 198 | +    dplyr::mutate(  | 
 | 199 | +      mod1 = all_models[.data$V1],  | 
 | 200 | +      mod2 = all_models[.data$V2],  | 
 | 201 | +      bf = calc_bf(.data$mod1, .data$mod2)  | 
 | 202 | +    ) |>  | 
 | 203 | +    dplyr::ungroup() |>  | 
 | 204 | +    dplyr::select(null_model = "V1", alt_model = "V2", "bf")  | 
 | 205 | + | 
 | 206 | +  if (!is.null(prior_prob)) {  | 
 | 207 | +    bf <- dplyr::mutate(  | 
 | 208 | +      bf,  | 
 | 209 | +      posterior_probs = mapply(  | 
 | 210 | +        calc_model_probabilities,  | 
 | 211 | +        all_models[.data$null_model],  | 
 | 212 | +        all_models[.data$alt_model],  | 
 | 213 | +        MoreArgs = list(prior_prob = prior_prob),  | 
 | 214 | +        SIMPLIFY = FALSE,  | 
 | 215 | +        USE.NAMES = FALSE  | 
 | 216 | +      )  | 
 | 217 | +    )  | 
 | 218 | +  }  | 
 | 219 | + | 
 | 220 | +  bf  | 
 | 221 | +}  | 
 | 222 | + | 
 | 223 | +calc_bf <- function(x, y) {  | 
 | 224 | +  log_marg_lik1 <- log_mll(x)  | 
 | 225 | +  log_marg_lik2 <- log_mll(y)  | 
 | 226 | + | 
 | 227 | +  exp(log_marg_lik1 - log_marg_lik2)  | 
 | 228 | +}  | 
 | 229 | + | 
 | 230 | +calc_model_probabilities <- function(x, y, prior_prob) {  | 
 | 231 | +  tibble::tibble(  | 
 | 232 | +    model_1 = log_mll(x),  | 
 | 233 | +    model_2 = log_mll(y),  | 
 | 234 | +    prior_prob = prior_prob  | 
 | 235 | +  ) |>  | 
 | 236 | +    dplyr::mutate(  | 
 | 237 | +      log_diff = (.data$model_1 + log(.data$prior_prob)) -  | 
 | 238 | +        (.data$model_2 + log(1 - .data$prior_prob)),  | 
 | 239 | +      posterior_prob = exp(.data$log_diff) / (1 + exp(.data$log_diff))  | 
 | 240 | +    ) |>  | 
 | 241 | +    dplyr::select(  | 
 | 242 | +      prior_prob_null = "prior_prob",  | 
 | 243 | +      posterior_prob_null = "posterior_prob"  | 
 | 244 | +    )  | 
 | 245 | +}  | 
0 commit comments