Skip to content

Commit 15fc594

Browse files
Merge pull request #67 from JeffreyCHoover/bayes-factor-v2
Bayes factor v2
2 parents 811c1f1 + 8a5eac5 commit 15fc594

21 files changed

+579
-26
lines changed

DESCRIPTION

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ BugReports: https://github.com/wjakethompson/measr/issues
2626
Depends:
2727
R (>= 4.1.0)
2828
Imports:
29+
bridgesampling,
2930
cli,
3031
dcm2,
3132
dcmstan (>= 0.0.0.9000),

NAMESPACE

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ export(aic)
1717
export(as_draws)
1818
export(as_label)
1919
export(as_name)
20+
export(bayes_factor)
2021
export(bic)
2122
export(cdi)
2223
export(cmdstanr)
@@ -36,6 +37,7 @@ export(gqs)
3637
export(hdcm)
3738
export(independent)
3839
export(lcdm)
40+
export(log_mll)
3941
export(loglik_array)
4042
export(loo)
4143
export(loo_compare)
@@ -68,6 +70,7 @@ import(Rcpp)
6870
import(dcmstan)
6971
import(methods)
7072
import(rlang)
73+
importFrom(bridgesampling,bridge_sampler)
7174
importFrom(dcm2,fit_m2)
7275
importFrom(dcmstan,create_profiles)
7376
importFrom(dcmstan,crum)

NEWS.md

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,15 @@
66

77
* Added `aic()` and `bic()` functions for calculating the Akaike and Bayesian information criteria, respectively, for models estimated with `method = "optim"`.
88

9-
* Refactored package to use S7 objects instead of S3
9+
* Refactored package to use S7 objects instead of S3.
1010

11-
* Functions for generating Stan code have been relocated to dcmstan
11+
* Functions for generating Stan code have been relocated to dcmstan.
1212

13-
* `measr_dcm()` deprecated in favor of `dcm_estimate()`
13+
* `measr_dcm()` deprecated in favor of `dcm_estimate()`.
14+
15+
* New functionality for relative model fit comparisons (`aic()`, `bic()`, `bayes_factor()`).
16+
17+
* New functionality for testing model assumptions (`yens_q3()`).
1418

1519
# measr 1.0.0
1620

R/add-model-evaluation.R

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,9 @@
1111
#' @inheritParams score
1212
#' @param x A [measrdcm][dcm_estimate()] object.
1313
#' @param criterion A vector of information criteria to calculate and add to the
14-
#' model object. Must be `"loo"` or `"waic"` for models estimated with MCMC,
15-
#' or `"aic"` or `"bic"` for models estimated with the optimizer.
14+
#' model object. Must be `"loo"`, `"waic"`, or `"log_mll"` for models
15+
#' estimated with MCMC, or `"aic"` or `"bic"` for models estimated with the
16+
#' optimizer.
1617
#' @param method A vector of model fit methods to evaluate and add to the model
1718
#' object.
1819
#' @param overwrite Logical. Indicates whether specified elements that have
@@ -38,9 +39,10 @@
3839
#' Thompson, 2019), depending on which methods are specified.
3940
#'
4041
#' For `add_criterion()`, relative fit criteria are added to the `$criteria`
41-
#' element of the fitted model. This function wraps [loo()] or [waic()] to
42-
#' calculate the LOO-CV (Vehtari et al., 2017) or WAIC (Watanabe, 2010),
43-
#' respectively, for models estimated with MCMC.
42+
#' element of the fitted model. For models estimated with MCMC, this function
43+
#' wraps [loo()] or [waic()] to calculate the LOO-CV (Vehtari et al., 2017) or
44+
#' WAIC (Watanabe, 2010), respectively, or [log_mll()] to calculate the log
45+
#' marginal likelihood, which is used for calculating Bayes factors.
4446
#' For models estimated with the optimizer, this wraps [aic()] or [bic()]
4547
#' to estimate the AIC (Akaike, 1973) or BIC (Schwarz, 1978), respectively.
4648
#'
@@ -124,7 +126,7 @@ NULL
124126
#' @rdname model_evaluation
125127
add_criterion <- function(
126128
x,
127-
criterion = c("loo", "waic", "aic", "bic"),
129+
criterion = c("loo", "waic", "log_mll", "aic", "bic"),
128130
overwrite = FALSE,
129131
save = TRUE,
130132
...,
@@ -133,7 +135,7 @@ add_criterion <- function(
133135
rdcmchecks::check_S7(x, class = "measrfit")
134136
criterion <- rlang::arg_match(
135137
criterion,
136-
values = c("loo", "waic", "aic", "bic"),
138+
values = c("loo", "waic", "log_mll", "aic", "bic"),
137139
multiple = TRUE
138140
)
139141
check_bool(overwrite)
@@ -149,16 +151,19 @@ add_criterion <- function(
149151
all_criteria <- c(new_criteria, redo_criteria)
150152

151153
if ("loo" %in% all_criteria) {
152-
x@criteria$loo <- loo(x, r_eff = r_eff, ...)
154+
x@criteria$loo <- loo(x, r_eff = r_eff, ..., force = TRUE)
153155
}
154156
if ("waic" %in% all_criteria) {
155-
x@criteria$waic <- waic(x, ...)
157+
x@criteria$waic <- waic(x, ..., force = TRUE)
158+
}
159+
if ("log_mll" %in% all_criteria) {
160+
x@criteria$log_mll <- log_mll(x, force = TRUE)
156161
}
157162
if ("aic" %in% all_criteria) {
158-
x@criteria$aic <- aic(x)
163+
x@criteria$aic <- aic(x, force = TRUE)
159164
}
160165
if ("bic" %in% all_criteria) {
161-
x@criteria$bic <- bic(x)
166+
x@criteria$bic <- bic(x, force = TRUE)
162167
}
163168

164169
# re-save model object (if applicable) ---------------------------------------

R/measr-package.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#' @importFrom glue glue
1212
#' @importFrom lifecycle deprecated
1313
#' @importFrom rstan sampling
14+
#' @importFrom bridgesampling bridge_sampler
1415
#' @importFrom tibble tibble
1516
#' @useDynLib measr, .registration = TRUE
1617
## usethis namespace: end

R/zxx-class-stan-classes.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
#' the [CmdStanModel][cmdstanr::CmdStanModel] class when using `{cmdstanr}`.
3636
#' All method classes inherit from `measr::stanmethod`.
3737
#'
38-
#' @return An [S7 object][S7_object()] with the corresponding class.
38+
#' @return An [S7 object][S7::S7_object()] with the corresponding class.
3939
#' @rdname stan-classes
4040
#' @name stan-classes
4141
#' @examples

R/zzz-methods-aic-bic.R

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
#' *The Annals of Statistics, 6*(2), 461–464. \doi{10.1214/aos/1176344136}
2525
#'
2626
#' @examplesIf measr_examples()
27-
#' # example code
2827
#' model_spec <- dcm_specify(qmatrix = dcmdata::mdm_qmatrix,
2928
#' identifier = "item")
3029
#' model <- dcm_estimate(dcm_spec = model_spec, data = dcmdata::mdm_data,

R/zzz-methods-bayes-factor.R

Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
#' Log marginal likelihood calculation
2+
#'
3+
#' Calculate the log marginal likelihood with bridge sampling (Meng & Wong,
4+
#' 1996). This is a wrapper around [bridgesampling::bridge_sampler()].
5+
#' Therefore, log marginal likelihood calculation is currently only available
6+
#' for models estimated with `{rstan}` using MCMC.
7+
#'
8+
#' @param x A [measrdcm][dcm_estimate()] object estimated with
9+
#' `backend = "optim"`.
10+
#' @param ... Unused.
11+
#' @param force If the criterion has already been added to the
12+
#' model object with [add_criterion()], should it be recalculated. Default is
13+
#' `FALSE`.
14+
#'
15+
#' @return The estimate of the log marginal likelihood.
16+
#' @export
17+
#'
18+
#' @references Meng, X.-L., & Wong, W. H. (1996). Simulating ratios of
19+
#' normalizing constants via a simple identity: A theoretical exploration.
20+
#' *Statistical Sinica, 6*(4), 831-860.
21+
#' <https://www.jstor.org/stable/24306045>
22+
#'
23+
#' @examplesIf measr_examples()
24+
#' model_spec <- dcm_specify(qmatrix = dcmdata::mdm_qmatrix,
25+
#' identifier = "item")
26+
#' model <- dcm_estimate(dcm_spec = model_spec, data = dcmdata::mdm_data,
27+
#' identifier = "respondent", method = "mcmc",
28+
#' seed = 63277)
29+
#'
30+
#' log_mll(model)
31+
log_mll <- S7::new_generic(
32+
"log_mll",
33+
"x",
34+
function(x, ..., force = FALSE) {
35+
S7::S7_dispatch()
36+
}
37+
)
38+
39+
S7::method(log_mll, measrdcm) <- function(x, force = FALSE) {
40+
if (!rlang::is_empty(x@criteria$log_mll) && !force) {
41+
return(x@criteria$log_mll)
42+
}
43+
44+
if (!(S7::S7_inherits(x@method, mcmc) && S7::S7_inherits(x@backend, rstan))) {
45+
cli::cli_abort(
46+
glue::glue(
47+
"{{.arg {rlang::caller_arg(x)}}} must be a model estimated with ",
48+
"{{.code method = \"mcmc\"}} and {{.code backend = \"rstan\"}} to ",
49+
"calculate the marginal likelihood"
50+
)
51+
)
52+
}
53+
54+
log_marg_lik <- bridgesampling::bridge_sampler(x@model, silent = TRUE)
55+
log_marg_lik$logml
56+
}
57+
58+
#' Bayes factor for model comparisons
59+
#'
60+
#' Calculate the Bayes factor for model comparisons, which represents the
61+
#' posterior odds of the null hypothesis when the prior probability of the null
62+
#' model is 0.5 (Jeffreys, 1935; Kass & Raftery, 1995).
63+
#' Consistent with the Bayesian reporting guidelines from Kruschke (2021), we
64+
#' calculate the posterior probability of the null model for a variety of prior
65+
#' probabilities, in addition to the Bayes factor.
66+
#'
67+
#' @param x A [measrdcm][dcm_estimate()] object.
68+
#' @param ... Additional [measrdcm][dcm_estimate()] to be compared to `x`.
69+
#' @param model_names Names given to each provided model in the comparison
70+
#' output. If `NULL` (the default), the names will be parsed from the names of
71+
#' the objects passed for comparison.
72+
#' @param prior_prob A numeric vector of prior probabilities for the null model
73+
#' used to calculate the posterior probability of the null model relative to
74+
#' alternative model. See details for more information.
75+
#'
76+
#' @details
77+
#' Bayes factors will be calculated for all possible pairwise comparisons
78+
#' between the models provided to `x` and `...`. In each comparison, one model
79+
#' is identified as the null model, and the other is the alternative. This
80+
#' distinction is not terribly meaningful from a calculation standpoint, as the
81+
#' probabilities for the alternative model are simply 1 minus the null
82+
#' probabilities. If you want particular models to be labeled as the "null", the
83+
#' determination is made by the order the models are sent to the function. That
84+
#' is, `x` will always be the null model. The first model included in `...` will
85+
#' be the alternative model when compared to `x` and the null model when
86+
#' compared to all other models included in `...`. Similarly, the second model
87+
#' included in `...` will be the alternative model when compared to `x` and the
88+
#' first model included in `...` and the null model in all other comparisons.
89+
#'
90+
#' `prior_prob` is used to specify a vector of possible prior probabilities for
91+
#' the null model. These are used in conjunction with the Bayes factor to
92+
#' determine the posterior model probability for the null model, relative to the
93+
#' alternative model. The posterior probability for the alternative model can
94+
#' be calculated as 1 minus the null model's posterior probability. You may
95+
#' specify a specific prior probability, or specify a range of possibilities to
96+
#' construct a graph similar to Kruschke's (2021) Figure 1. These probabilities
97+
#' can be interpreted as, "If the prior probability is \{`prior_prob_null`\},
98+
#' then the posterior is \{`posterior_prob_null`\}" (or 1 minus for the
99+
#' alternative model).
100+
#'
101+
#' @concept Bayes
102+
#'
103+
#' @return A [tibble][tibble::tibble-package] with one row per model comparison
104+
#' and four columns.
105+
#' * `null_model`: The null model in the comparison.
106+
#' * `alt_model`: The alternative model in the comparison.
107+
#' * `bf`: The estimated Bayes factor.
108+
#' * `posterior_probs`: A nested list column, where element element is a
109+
#' tibble with two columns:
110+
#' * `prior_prob_null`: The prior probability that the null model is
111+
#' correct.
112+
#' * `posterior_prob_null`: The posterior probability that the null model is
113+
#' correct.
114+
#'
115+
#' The list column can be unnested with [tidyr::unnest()] (see examples). If
116+
#' `prior_prob` is `NULL`, the `posterior_probs` column is excluded from
117+
#' the returned object.
118+
#' @export
119+
#'
120+
#' @references Jeffreys, H. (1935). Some tests of significance, treated by the
121+
#' theory of probability. *Mathematical Proceedings of the Cambridge
122+
#' Philosophical Society, 31*(2), 203-222. \doi{10.1017/S030500410001330X}
123+
#' @references Kass, R. E., & Raftery, A. E. (1995). Bayes factors.
124+
#' *Journal of the American Statistical Association, 90*(430), 773-795.
125+
#' \doi{10.1080/01621459.1995.10476572}
126+
#' @references Kruschke, J. K. (2021). Bayesian analysis reporting guidelines.
127+
#' *Nature, 5*, 1282-1291. \doi{10.1038/s41562-021-01177-7}
128+
#'
129+
#' @examplesIf measr_examples()
130+
#' mdm_dina <- dcm_estimate(
131+
#' dcm_specify(dcmdata::mdm_qmatrix, identifier = "item",
132+
#' measurement_model = dina()),
133+
#' data = dcmdata::mdm_data, missing = NA, identifier = "respondent",
134+
#' method = "mcmc", seed = 63277, backend = "rstan",
135+
#' iter = 700, warmup = 500, chains = 2, refresh = 0
136+
#' )
137+
#'
138+
#' mdm_dino <- dcm_estimate(
139+
#' dcm_specify(dcmdata::mdm_qmatrix, identifier = "item",
140+
#' measurement_model = dino()),
141+
#' data = dcmdata::mdm_data, missing = NA, identifier = "respondent",
142+
#' method = "mcmc", seed = 63277, backend = "rstan",
143+
#' iter = 700, warmup = 500, chains = 2, refresh = 0
144+
#' )
145+
#'
146+
#' bf <- bayes_factor(mdm_dina, mdm_dino)
147+
#' bf
148+
#'
149+
#' tidyr::unnest(bf, "posterior_probs")
150+
bayes_factor <- S7::new_generic(
151+
"bayes_factor",
152+
"x",
153+
function(x, ..., model_names = NULL, prior_prob = seq(.02, .98, by = .02)) {
154+
for (i in seq_along(prior_prob)) {
155+
check_number_decimal(prior_prob[i], min = 0, max = 1, arg = "prior_prob")
156+
}
157+
158+
S7::S7_dispatch()
159+
}
160+
)
161+
162+
S7::method(bayes_factor, measrdcm) <- function(
163+
x,
164+
...,
165+
model_names = NULL,
166+
prior_prob = seq(.02, .98, by = .02)
167+
) {
168+
dots <- rlang::dots_list(..., .named = TRUE)
169+
dots_check <- vapply(dots, S7::S7_inherits, logical(1), class = measrdcm)
170+
if (!all(dots_check)) {
171+
msg <- paste(
172+
"{.arg {cli::cli_vec(names(dots)[!dots_check])}} must",
173+
"{?be a/be a/all be} {.cls measrdcm} object{?s}"
174+
)
175+
cli::cli_abort(msg)
176+
}
177+
all_models <- c(list(x), dots)
178+
179+
if (is.null(model_names)) {
180+
model_names <- c(rlang::caller_arg(x), names(dots))
181+
} else if (length(model_names) != length(all_models)) {
182+
rdcmchecks::abort_bad_argument(
183+
arg = "model_names",
184+
must = glue::glue(
185+
"be of length {length(all_models)}, ",
186+
"the same as the number of models provided"
187+
),
188+
not = length(model_names)
189+
)
190+
}
191+
192+
all_models <- rlang::set_names(all_models, model_names)
193+
bf <- combn(model_names, m = 2) |>
194+
t() |>
195+
as.data.frame() |>
196+
tibble::as_tibble() |>
197+
dplyr::rowwise() |>
198+
dplyr::mutate(
199+
mod1 = all_models[.data$V1],
200+
mod2 = all_models[.data$V2],
201+
bf = calc_bf(.data$mod1, .data$mod2)
202+
) |>
203+
dplyr::ungroup() |>
204+
dplyr::select(null_model = "V1", alt_model = "V2", "bf")
205+
206+
if (!is.null(prior_prob)) {
207+
bf <- dplyr::mutate(
208+
bf,
209+
posterior_probs = mapply(
210+
calc_model_probabilities,
211+
all_models[.data$null_model],
212+
all_models[.data$alt_model],
213+
MoreArgs = list(prior_prob = prior_prob),
214+
SIMPLIFY = FALSE,
215+
USE.NAMES = FALSE
216+
)
217+
)
218+
}
219+
220+
bf
221+
}
222+
223+
calc_bf <- function(x, y) {
224+
log_marg_lik1 <- log_mll(x)
225+
log_marg_lik2 <- log_mll(y)
226+
227+
exp(log_marg_lik1 - log_marg_lik2)
228+
}
229+
230+
calc_model_probabilities <- function(x, y, prior_prob) {
231+
tibble::tibble(
232+
model_1 = log_mll(x),
233+
model_2 = log_mll(y),
234+
prior_prob = prior_prob
235+
) |>
236+
dplyr::mutate(
237+
log_diff = (.data$model_1 + log(.data$prior_prob)) -
238+
(.data$model_2 + log(1 - .data$prior_prob)),
239+
posterior_prob = exp(.data$log_diff) / (1 + exp(.data$log_diff))
240+
) |>
241+
dplyr::select(
242+
prior_prob_null = "prior_prob",
243+
posterior_prob_null = "posterior_prob"
244+
)
245+
}

R/zzz-methods-yens-q3.R

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@
6161
#' \doi{10.1177/014662168400800201}
6262
#'
6363
#' @examplesIf measr_examples()
64-
#' # example code
6564
#' model_spec <- dcm_specify(qmatrix = dcmdata::mdm_qmatrix,
6665
#' identifier = "item")
6766
#' model <- dcm_estimate(dcm_spec = model_spec, data = dcmdata::mdm_data,

0 commit comments

Comments
 (0)