diff --git a/NAMESPACE b/NAMESPACE index 7fe559c6..614fe4e7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -144,6 +144,7 @@ export(ppc_km_overlay_grouped) export(ppc_loo_intervals) export(ppc_loo_pit) export(ppc_loo_pit_data) +export(ppc_loo_pit_ecdf) export(ppc_loo_pit_overlay) export(ppc_loo_pit_qq) export(ppc_loo_ribbon) diff --git a/NEWS.md b/NEWS.md index ddd210d9..83d34833 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # bayesplot (development version) +* Added `ppc_loo_pit_ecdf()` by @TeemuSailynoja + # bayesplot 1.12.0 * Expand checking workflows to more platforms by @andrjohns (#324) diff --git a/R/ppc-loo.R b/R/ppc-loo.R index b81d4ec3..681d8fc4 100644 --- a/R/ppc-loo.R +++ b/R/ppc-loo.R @@ -15,9 +15,9 @@ #' @param psis_object If using **loo** version `2.0.0` or greater, an #' object returned by the `psis()` function (or by the `loo()` function #' with argument `save_psis` set to `TRUE`). -#' @param alpha,size,fatten,linewidth Arguments passed to code geoms to control plot -#' aesthetics. For `ppc_loo_pit_qq()` and `ppc_loo_pit_overlay()`, `size` and -#' `alpha` are passed to [ggplot2::geom_point()] and +#' @param alpha,size,fatten,linewidth Arguments passed to code geoms to control +#' plot aesthetics. For `ppc_loo_pit_qq()` and `ppc_loo_pit_overlay()`,`size` +#' and `alpha` are passed to [ggplot2::geom_point()] and #' [ggplot2::geom_density()], respectively. For `ppc_loo_intervals()`, `size` #' `linewidth` and `fatten` are passed to [ggplot2::geom_pointrange()]. For #' `ppc_loo_ribbon()`, `alpha` and `size` are passed to @@ -27,7 +27,7 @@ #' #' @section Plot Descriptions: #' \describe{ -#' \item{`ppc_loo_pit_overlay()`, `ppc_loo_pit_qq()`}{ +#' \item{`ppc_loo_pit_overlay()`, `ppc_loo_pit_qq()`, `ppc_loo_pit_ecdf()`}{ #' The calibration of marginal predictions can be assessed using probability #' integral transformation (PIT) checks. LOO improves the check by avoiding the #' double use of data. See the section on marginal predictive checks in Gelman @@ -52,6 +52,14 @@ #' we have found that the overlaid density plot (`ppc_loo_pit_overlay()`) #' function will provide a clearer picture of calibration problems than the #' Q-Q plot. +#' +#' The `ppc_loo_pit_ecdf()` function visualizes the empirical cumulative +#' distribution function (ECDF) of the LOO PITs overlaid with simultaneous +#' confidence intervals for a standard uniform sample. For large samples, +#' these confidence intervals are visually very narrow. Setting the +#' `plot_diff` argument to `TRUE` transforms the plot to display the +#' difference of the ECDF and the theoretical expectation, which can aid in +#' the visual assessment of calibration. #' } #' \item{`ppc_loo_intervals()`, `ppc_loo_ribbon()`}{ #' Similar to [ppc_intervals()] and [ppc_ribbon()] but the intervals are for @@ -65,7 +73,6 @@ #' @template reference-loo #' #' @examples -#' #' \dontrun{ #' library(rstanarm) #' library(loo) @@ -73,12 +80,12 @@ #' head(radon) #' fit <- stan_lmer( #' log_radon ~ floor + log_uranium + floor:log_uranium -#' + (1 + floor | county), +#' + (1 + floor | county), #' data = radon, #' iter = 100, #' chains = 2, #' cores = 2 -#' ) +#' ) #' y <- radon$log_radon #' yrep <- posterior_predict(fit) #' @@ -93,6 +100,12 @@ #' ppc_loo_pit_qq(y, yrep, lw = lw) #' ppc_loo_pit_qq(y, yrep, lw = lw, compare = "normal") #' +#' # predictive calibration check using LOO probability integral transform +#' ppc_loo_pit_ecdf(y, yrep, lw) +#' +#' # With `plot_diff = TRUE` it is easier to assess the calibration. +#' ppc_loo_pit_ecdf(y, yrep, lw, plot_diff = TRUE) +#' #' # can use the psis object instead of lw #' ppc_loo_pit_qq(y, yrep, psis_object = psis1) #' @@ -101,18 +114,21 @@ #' ppc_loo_intervals(y, yrep, psis_object = psis1, subset = keep_obs) #' #' color_scheme_set("gray") -#' ppc_loo_intervals(y, yrep, psis_object = psis1, subset = keep_obs, -#' order = "median") +#' ppc_loo_intervals(y, yrep, +#' psis_object = psis1, subset = keep_obs, +#' order = "median" +#' ) #' } #' NULL #' @rdname PPC-loo #' @export -#' @param pit For `ppc_loo_pit_overlay()` and `ppc_loo_pit_qq()`, optionally a -#' vector of precomputed PIT values that can be specified instead of `y`, -#' `yrep`, and `lw` (these are all ignored if `pit` is specified). If not -#' specified the PIT values are computed internally before plotting. +#' @param pit For `ppc_loo_pit_overlay()`, `ppc_loo_pit_qq()`, and +#' `ppc_loo_pit_ecdf()` optionally a vector of precomputed PIT values that +#' can be specified instead of `y`, `yrep`, and `lw` (these are all ignored +#' if `pit` is specified). If not specified the PIT values are computed +#' internally before plotting. #' @param samples For `ppc_loo_pit_overlay()`, the number of data sets (each #' the same size as `y`) to simulate from the standard uniform #' distribution. The default is 100. The density estimate of each dataset is @@ -182,31 +198,34 @@ ppc_loo_pit_overlay <- function(y, ) } - message(paste("NOTE: The kernel density estimate assumes continuous observations", - "and is not optimal for discrete observations.")) + message(paste( + "NOTE: The kernel density estimate assumes continuous observations", + "and is not optimal for discrete observations." + )) if (boundary_correction) { p <- ggplot(data) + aes(x = .data$x, y = .data$value) + geom_line( - aes(group = .data$rep_id, color = "yrep"), + aes(group = .data$rep_id, color = "yrep"), data = function(x) dplyr::filter(x, !.data$is_y), alpha = alpha, linewidth = size, - na.rm = TRUE) + + na.rm = TRUE + ) + geom_line( aes(color = "y"), data = function(x) dplyr::filter(x, .data$is_y), - linewidth = 1, + linewidth = 1, lineend = "round", - na.rm = TRUE) + + na.rm = TRUE + ) + scale_x_continuous( limits = c(0, 1), expand = expansion(0, 0.01), breaks = seq(0, 1, by = 0.25), labels = c("0", "0.25", "0.5", "0.75", "1") ) - } else { p <- ggplot(data) + aes(x = .data$value) + @@ -222,7 +241,8 @@ ppc_loo_pit_overlay <- function(y, adjust = adjust, kernel = kernel, n = n_dens, - na.rm = TRUE) + + na.rm = TRUE + ) + stat_density( aes(color = "y"), data = function(x) dplyr::filter(x, .data$is_y), @@ -235,7 +255,8 @@ ppc_loo_pit_overlay <- function(y, adjust = adjust, kernel = kernel, n = n_dens, - na.rm = TRUE) + + na.rm = TRUE + ) + scale_x_continuous( limits = c(0.05, 0.95), expand = expansion(0, 0), @@ -344,19 +365,108 @@ ppc_loo_pit_qq <- function(y, distribution = theoretical, color = get_color("m"), size = size, - alpha = alpha) + + alpha = alpha + ) + geom_abline( linetype = 2, - color = "black") + + color = "black" + ) + bayesplot_theme_get() + labs(x = x_lab, y = y_lab) if (compare == "uniform") { - qq + lims(x=c(0,1), y=c(0,1)) + qq + lims(x = c(0, 1), y = c(0, 1)) } else { qq } } +#' @rdname PPC-loo +#' @export +#' @param K For `ppc_loo_pit_ecdf()` an optional integer defining the number +#' of equally spaced evaluation points for the PIT-ECDF. Reducing K when +#' using `interpolate_adj = FALSE` makes computing the confidence bands +#' faster. If `pit` is supplied, defaults to `length(pit)`, otherwise +#' `yrep` determines the maximum accuracy of the estimated PIT values and +#' `K` is set to `min(nrow(yrep) + 1, 1000)`. +#' @param plot_diff For `ppc_loo_pit_ecdf()`, a boolean defining whether to +#' plot the difference between the observed PIT-ECDF and the theoretical +#' expectation for uniform PIT values rather than plotting the regular ECDF. +#' The default is `FALSE`, but for large samples we recommend setting +#' `plot_diff = TRUE` to better use the plot area. +#' @param interpolate_adj For `ppc_loo_pit_ecdf()`, a boolean defining if the +#' simultaneous confidence bands should be interpolated based on precomputed +#' values rather than computed exactly. Computing the bands may be +#' computationally intensive and the approximation gives a fast method for +#' assessing the ECDF trajectory. The default is to use interpolation if `K` +#' is greater than 200. +ppc_loo_pit_ecdf <- function(y, + yrep, + lw = NULL, + ..., + psis_object = NULL, + pit = NULL, + K = NULL, + prob = .99, + plot_diff = FALSE, + interpolate_adj = NULL) { + check_ignored_arguments(...) + + if (!is.null(pit)) { + inform("'pit' specified so ignoring 'y','yrep','lw' if specified.") + pit <- validate_pit(pit) + if (is.null(K)) { + K <- length(pit) + } + } else { + suggested_package("rstantools") + y <- validate_y(y) + yrep <- validate_predictions(yrep, length(y)) + lw <- .get_lw(lw, psis_object) + stopifnot(identical(dim(yrep), dim(lw))) + pit <- pmin(1, rstantools::loo_pit(object = yrep, y = y, lw = lw)) + if (is.null(K)) { + K <- min(nrow(yrep) + 1, 1000) + } + } + + n_obs <- length(pit) + gamma <- adjust_gamma( + N = n_obs, + K = K, + prob = prob, + interpolate_adj = interpolate_adj + ) + lims <- ecdf_intervals(gamma = gamma, N = n_obs, K = K) + ggplot() + + aes( + x = seq(0, 1, length.out = K), + y = ecdf(pit)(seq(0, 1, length.out = K)) - + (plot_diff == TRUE) * seq(0, 1, length.out = K), + color = "y" + ) + + geom_step(show.legend = FALSE) + + geom_step( + aes( + y = lims$upper[-1] / n_obs - + (plot_diff == TRUE) * seq(0, 1, length.out = K), + color = "yrep" + ), + linetype = 2, show.legend = FALSE + ) + + geom_step( + aes( + y = lims$lower[-1] / n_obs - + (plot_diff == TRUE) * seq(0, 1, length.out = K), + color = "yrep" + ), + linetype = 2, show.legend = FALSE + ) + + labs(y = ifelse(plot_diff, "ECDF difference", "ECDF"), x = "LOO PIT") + + yaxis_ticks(FALSE) + + scale_color_ppc() + + bayesplot_theme_get() +} + #' @rdname PPC-loo #' @export @@ -385,7 +495,6 @@ ppc_loo_pit <- #' @rdname PPC-loo #' @export -#' @template args-prob-prob_outer #' @param intervals For `ppc_loo_intervals()` and `ppc_loo_ribbon()`, optionally #' a matrix of pre-computed LOO predictive intervals that can be specified #' instead of `yrep` (ignored if `intervals` is specified). If not specified @@ -398,6 +507,10 @@ ppc_loo_pit <- #' the plotted intervals. The default (`"index"`) is to plot them in the #' order of the observations. The alternative (`"median"`) arranges them #' by median value from smallest (left) to largest (right). +#' @param prob,prob_outer Values between `0` and `1` indicating the desired +#' probability mass to include in the inner and outer intervals. The defaults +#' are `prob=0.5` and `prob_outer=0.9` for `ppc_loo_intervals()` and +#' `prob = 0.99` for `ppc_loo_pit_ecdf()`. #' @param subset For `ppc_loo_intervals()` and `ppc_loo_ribbon()`, an optional #' integer vector indicating which observations in `y` (and `yrep`) to #' include. Dropping observations from `y` and `yrep` manually before passing @@ -421,7 +534,6 @@ ppc_loo_intervals <- fatten = 2.5, linewidth = 1, order = c("index", "median")) { - check_ignored_arguments(...) y <- validate_y(y) order_by_median <- match.arg(order) == "median" @@ -440,7 +552,7 @@ ppc_loo_intervals <- if (!is.null(subset)) { stopifnot(length(y) >= length(subset)) y <- y[subset] - yrep <- yrep[, subset, drop=FALSE] + yrep <- yrep[, subset, drop = FALSE] psis_object <- .psis_subset(psis_object, subset) } probs <- sort(c(prob, prob_outer)) @@ -523,7 +635,7 @@ ppc_loo_ribbon <- "'yrep', 'psis_object', 'subset', if specified." )) if (ncol(intervals) == 3) { - intervals <- cbind(intervals[, 1], intervals, intervals[, 3]) + intervals <- cbind(intervals[, 1], intervals, intervals[, 3]) } } else { suggested_package("loo", min_version = "2.0.0") @@ -531,7 +643,7 @@ ppc_loo_ribbon <- if (!is.null(subset)) { stopifnot(length(y) >= length(subset)) y <- y[subset] - yrep <- yrep[, subset, drop=FALSE] + yrep <- yrep[, subset, drop = FALSE] psis_object <- .psis_subset(psis_object, subset) } probs <- sort(c(prob, prob_outer)) @@ -570,7 +682,7 @@ ppc_loo_ribbon <- geom_line( aes(y = .data$y_obs, color = "y"), linewidth = 0.5, - alpha = 2/3 + alpha = 2 / 3 ) + scale_color_ppc() + scale_fill_ppc(values = c(NA, get_color("l"))) + @@ -589,10 +701,11 @@ ppc_loo_ribbon <- y_obs = y, x = x, ll = intervals[, 1], - l = intervals[, 2], - m = intervals[, 3], - h = intervals[, 4], - hh = intervals[, 5]) + l = intervals[, 2], + m = intervals[, 3], + h = intervals[, 4], + hh = intervals[, 5] + ) } # subset a psis_object without breaking it @@ -602,7 +715,7 @@ ppc_loo_ribbon <- if (length(subset) > dim(psis_object)[2]) { abort("'subset' has too many elements.") } - psis_object$log_weights <- psis_object$log_weights[, subset, drop=FALSE] + psis_object$log_weights <- psis_object$log_weights[, subset, drop = FALSE] psis_object$diagnostics$pareto_k <- psis_object$diagnostics$pareto_k[subset] psis_object$diagnostics$n_eff <- psis_object$diagnostics$n_eff[subset] @@ -618,29 +731,28 @@ ppc_loo_ribbon <- # convolution with a Gaussian filter. # Based on scipy.signal.gaussian formula -.gaussian <- function(N, bw){ - n <- seq(0, N -1) - (N - 1)/2 - sigma = 2 * bw * bw - w = exp(-n^2 / sigma) +.gaussian <- function(N, bw) { + n <- seq(0, N - 1) - (N - 1) / 2 + sigma <- 2 * bw * bw + w <- exp(-n^2 / sigma) return(w) - } .linear_convolution <- function(x, bw, grid_counts, grid_breaks, - grid_len){ + grid_len) { # 1-D Gaussian estimation via # convolution of a Gaussian filter and the binned relative freqs - bin_width <- grid_breaks[2] - grid_breaks[1] + bin_width <- grid_breaks[2] - grid_breaks[1] f <- grid_counts / bin_width / length(x) bw <- bw / bin_width # number of data points to generate for gaussian filter - gauss_n <- as.integer(bw * 2 *pi) - if (gauss_n == 0){ - gauss_n = 1 + gauss_n <- as.integer(bw * 2 * pi) + if (gauss_n == 0) { + gauss_n <- 1 } # Generate Gaussian filter vector @@ -648,39 +760,44 @@ ppc_loo_ribbon <- npad <- as.integer(grid_len / 5) # Reflection method (i.e. get first N and last N points to pad vector) - f <- c(rev(f[1:(npad)]), - f, - rev(f)[(grid_len - npad):(grid_len - 1)]) + f <- c( + rev(f[1:(npad)]), + f, + rev(f)[(grid_len - npad):(grid_len - 1)] + ) # Convolution: Gaussian filter + reflection method (pading) works as an # averaging moving window based on a Gaussian density which takes care # of the density boundary values near 0 and 1. bc_pvals <- stats::filter(f, - kernel, - method = 'convolution', - sides = 2)[(npad + 1):(npad + grid_len)] + kernel, + method = "convolution", + sides = 2 + )[(npad + 1):(npad + grid_len)] bc_pvals / (bw * (2 * pi)^0.5) } .kde_correction <- function(x, bw, - grid_len){ + grid_len) { # Generate boundary corrected values via a linear convolution using a # 1-D Gaussian window filter. This method uses the "reflection method" # to estimate these pvalues and helps speed up the code - if (any(is.infinite(x))){ - warning(paste("Ignored", sum(is.infinite(x)), - "Non-finite PIT values are invalid for KDE boundary correction method")) + if (any(is.infinite(x))) { + warning(paste( + "Ignored", sum(is.infinite(x)), + "Non-finite PIT values are invalid for KDE boundary correction method" + )) x <- x[is.finite(x)] } - if (grid_len < 100){ + if (grid_len < 100) { grid_len <- 100 } # Get relative frequency boundaries and counts for input vector - bins <- seq(from= min(x), to = max(x), length.out = grid_len + 1) + bins <- seq(from = min(x), to = max(x), length.out = grid_len + 1) hist_obj <- graphics::hist(x, breaks = bins, plot = FALSE) grid_breaks <- hist_obj$breaks grid_counts <- hist_obj$counts @@ -694,10 +811,10 @@ ppc_loo_ribbon <- # Generate vector of x-axis values for plotting based on binned relative freqs n_breaks <- length(grid_breaks) - xs <- (grid_breaks[2:n_breaks] + grid_breaks[1:(n_breaks - 1)]) / 2 + xs <- (grid_breaks[2:n_breaks] + grid_breaks[1:(n_breaks - 1)]) / 2 - first_nonNA <- utils::head(which(!is.na(bc_pvals)),1) - last_nonNA <- utils::tail(which(!is.na(bc_pvals)),1) + first_nonNA <- utils::head(which(!is.na(bc_pvals)), 1) + last_nonNA <- utils::tail(which(!is.na(bc_pvals)), 1) bc_pvals[1:first_nonNA] <- bc_pvals[first_nonNA] bc_pvals[last_nonNA:length(bc_pvals)] <- bc_pvals[last_nonNA] @@ -706,23 +823,25 @@ ppc_loo_ribbon <- # Wrapper function to generate runif reference lines based on # .kde_correction() -.ref_kde_correction <- function(unifs, bw, grid_len){ - +.ref_kde_correction <- function(unifs, bw, grid_len) { # Allocate memory - idx <- seq(from = 1, - to = ncol(unifs)*nrow(unifs) + ncol(unifs), - by = ncol(unifs)) - idx <- c(idx, ncol(unifs)*nrow(unifs)) - xs <- rep(0, ncol(unifs)*nrow(unifs)) + idx <- seq( + from = 1, + to = ncol(unifs) * nrow(unifs) + ncol(unifs), + by = ncol(unifs) + ) + idx <- c(idx, ncol(unifs) * nrow(unifs)) + xs <- rep(0, ncol(unifs) * nrow(unifs)) bc_mat <- matrix(0, nrow(unifs), ncol(unifs)) # Generate boundary corrected reference values - for (i in 1:nrow(unifs)){ - bc_list <- .kde_correction(unifs[i,], - bw = bw, - grid_len = grid_len) - bc_mat[i,] <- bc_list$bc_pvals - xs[idx[i]:(idx[i+1]-1)] <- bc_list$xs + for (i in 1:nrow(unifs)) { + bc_list <- .kde_correction(unifs[i, ], + bw = bw, + grid_len = grid_len + ) + bc_mat[i, ] <- bc_list$bc_pvals + xs[idx[i]:(idx[i + 1] - 1)] <- bc_list$xs } list(xs = xs, unifs = bc_mat) diff --git a/man/PPC-loo.Rd b/man/PPC-loo.Rd index 58e6a7fd..10f29d8c 100644 --- a/man/PPC-loo.Rd +++ b/man/PPC-loo.Rd @@ -5,6 +5,7 @@ \alias{ppc_loo_pit_overlay} \alias{ppc_loo_pit_data} \alias{ppc_loo_pit_qq} +\alias{ppc_loo_pit_ecdf} \alias{ppc_loo_pit} \alias{ppc_loo_intervals} \alias{ppc_loo_ribbon} @@ -54,6 +55,19 @@ ppc_loo_pit_qq( alpha = 1 ) +ppc_loo_pit_ecdf( + y, + yrep, + lw = NULL, + ..., + psis_object = NULL, + pit = NULL, + K = NULL, + prob = 0.99, + plot_diff = FALSE, + interpolate_adj = NULL +) + ppc_loo_pit( y, yrep, @@ -116,10 +130,11 @@ the \strong{Examples} section, below. If \code{lw} is not specified then object returned by the \code{psis()} function (or by the \code{loo()} function with argument \code{save_psis} set to \code{TRUE}).} -\item{pit}{For \code{ppc_loo_pit_overlay()} and \code{ppc_loo_pit_qq()}, optionally a -vector of precomputed PIT values that can be specified instead of \code{y}, -\code{yrep}, and \code{lw} (these are all ignored if \code{pit} is specified). If not -specified the PIT values are computed internally before plotting.} +\item{pit}{For \code{ppc_loo_pit_overlay()}, \code{ppc_loo_pit_qq()}, and +\code{ppc_loo_pit_ecdf()} optionally a vector of precomputed PIT values that +can be specified instead of \code{y}, \code{yrep}, and \code{lw} (these are all ignored +if \code{pit} is specified). If not specified the PIT values are computed +internally before plotting.} \item{samples}{For \code{ppc_loo_pit_overlay()}, the number of data sets (each the same size as \code{y}) to simulate from the standard uniform @@ -127,9 +142,9 @@ distribution. The default is 100. The density estimate of each dataset is plotted as a thin line in the plot, with the density estimate of the LOO PITs overlaid as a thicker dark line.} -\item{alpha, size, fatten, linewidth}{Arguments passed to code geoms to control plot -aesthetics. For \code{ppc_loo_pit_qq()} and \code{ppc_loo_pit_overlay()}, \code{size} and -\code{alpha} are passed to \code{\link[ggplot2:geom_point]{ggplot2::geom_point()}} and +\item{alpha, size, fatten, linewidth}{Arguments passed to code geoms to control +plot aesthetics. For \code{ppc_loo_pit_qq()} and \code{ppc_loo_pit_overlay()},\code{size} +and \code{alpha} are passed to \code{\link[ggplot2:geom_point]{ggplot2::geom_point()}} and \code{\link[ggplot2:geom_density]{ggplot2::geom_density()}}, respectively. For \code{ppc_loo_intervals()}, \code{size} \code{linewidth} and \code{fatten} are passed to \code{\link[ggplot2:geom_linerange]{ggplot2::geom_pointrange()}}. For \code{ppc_loo_ribbon()}, \code{alpha} and \code{size} are passed to @@ -160,6 +175,31 @@ compares computed PIT values to the standard uniform distribution. If calculated from the PIT values to the theoretical standard normal quantiles.} +\item{K}{For \code{ppc_loo_pit_ecdf()} an optional integer defining the number +of equally spaced evaluation points for the PIT-ECDF. Reducing K when +using \code{interpolate_adj = FALSE} makes computing the confidence bands +faster. If \code{pit} is supplied, defaults to \code{length(pit)}, otherwise +\code{yrep} determines the maximum accuracy of the estimated PIT values and +\code{K} is set to \code{min(nrow(yrep) + 1, 1000)}.} + +\item{prob, prob_outer}{Values between \code{0} and \code{1} indicating the desired +probability mass to include in the inner and outer intervals. The defaults +are \code{prob=0.5} and \code{prob_outer=0.9} for \code{ppc_loo_intervals()} and +\code{prob = 0.99} for \code{ppc_loo_pit_ecdf()}.} + +\item{plot_diff}{For \code{ppc_loo_pit_ecdf()}, a boolean defining whether to +plot the difference between the observed PIT-ECDF and the theoretical +expectation for uniform PIT values rather than plotting the regular ECDF. +The default is \code{FALSE}, but for large samples we recommend setting +\code{plot_diff = TRUE} to better use the plot area.} + +\item{interpolate_adj}{For \code{ppc_loo_pit_ecdf()}, a boolean defining if the +simultaneous confidence bands should be interpolated based on precomputed +values rather than computed exactly. Computing the bands may be +computationally intensive and the approximation gives a fast method for +assessing the ECDF trajectory. The default is to use interpolation if \code{K} +is greater than 200.} + \item{subset}{For \code{ppc_loo_intervals()} and \code{ppc_loo_ribbon()}, an optional integer vector indicating which observations in \code{y} (and \code{yrep}) to include. Dropping observations from \code{y} and \code{yrep} manually before passing @@ -178,10 +218,6 @@ data points and five columns in the following order: lower outer interval, lower inner interval, median (50\%), upper inner interval and upper outer interval (column names are ignored).} -\item{prob, prob_outer}{Values between \code{0} and \code{1} indicating the desired -probability mass to include in the inner and outer intervals. The defaults -are \code{prob=0.5} and \code{prob_outer=0.9}.} - \item{order}{For \code{ppc_loo_intervals()}, a string indicating how to arrange the plotted intervals. The default (\code{"index"}) is to plot them in the order of the observations. The alternative (\code{"median"}) arranges them @@ -198,7 +234,7 @@ for details. \section{Plot Descriptions}{ \describe{ -\item{\code{ppc_loo_pit_overlay()}, \code{ppc_loo_pit_qq()}}{ +\item{\code{ppc_loo_pit_overlay()}, \code{ppc_loo_pit_qq()}, \code{ppc_loo_pit_ecdf()}}{ The calibration of marginal predictions can be assessed using probability integral transformation (PIT) checks. LOO improves the check by avoiding the double use of data. See the section on marginal predictive checks in Gelman @@ -223,6 +259,14 @@ the (mis)calibration better for the extreme values. However, in most cases we have found that the overlaid density plot (\code{ppc_loo_pit_overlay()}) function will provide a clearer picture of calibration problems than the Q-Q plot. + +The \code{ppc_loo_pit_ecdf()} function visualizes the empirical cumulative +distribution function (ECDF) of the LOO PITs overlaid with simultaneous +confidence intervals for a standard uniform sample. For large samples, +these confidence intervals are visually very narrow. Setting the +\code{plot_diff} argument to \code{TRUE} transforms the plot to display the +difference of the ECDF and the theoretical expectation, which can aid in +the visual assessment of calibration. } \item{\code{ppc_loo_intervals()}, \code{ppc_loo_ribbon()}}{ Similar to \code{\link[=ppc_intervals]{ppc_intervals()}} and \code{\link[=ppc_ribbon]{ppc_ribbon()}} but the intervals are for @@ -232,7 +276,6 @@ the LOO predictive distribution. } \examples{ - \dontrun{ library(rstanarm) library(loo) @@ -240,12 +283,12 @@ library(loo) head(radon) fit <- stan_lmer( log_radon ~ floor + log_uranium + floor:log_uranium - + (1 + floor | county), + + (1 + floor | county), data = radon, iter = 100, chains = 2, cores = 2 - ) +) y <- radon$log_radon yrep <- posterior_predict(fit) @@ -260,6 +303,12 @@ ppc_loo_pit_overlay(y, yrep, lw = lw) ppc_loo_pit_qq(y, yrep, lw = lw) ppc_loo_pit_qq(y, yrep, lw = lw, compare = "normal") +# predictive calibration check using LOO probability integral transform +ppc_loo_pit_ecdf(y, yrep, lw) + +# With `plot_diff = TRUE` it is easier to assess the calibration. +ppc_loo_pit_ecdf(y, yrep, lw, plot_diff = TRUE) + # can use the psis object instead of lw ppc_loo_pit_qq(y, yrep, psis_object = psis1) @@ -268,8 +317,10 @@ keep_obs <- 1:50 ppc_loo_intervals(y, yrep, psis_object = psis1, subset = keep_obs) color_scheme_set("gray") -ppc_loo_intervals(y, yrep, psis_object = psis1, subset = keep_obs, - order = "median") +ppc_loo_intervals(y, yrep, + psis_object = psis1, subset = keep_obs, + order = "median" +) } } diff --git a/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-default.svg b/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-default.svg new file mode 100644 index 00000000..87a4c0b9 --- /dev/null +++ b/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-default.svg @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 + + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 +LOO PIT +ECDF +ppc_loo_pit_ecdf (default) + + diff --git a/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-ecdf-difference.svg b/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-ecdf-difference.svg new file mode 100644 index 00000000..ac754257 --- /dev/null +++ b/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-ecdf-difference.svg @@ -0,0 +1,55 @@ + + + + + + + + + + + + + + + + + + + + + + + + + +-0.1 +0.0 +0.1 + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 +LOO PIT +ECDF difference +ppc_loo_pit_ecdf (ecdf difference) + + diff --git a/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-k.svg b/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-k.svg new file mode 100644 index 00000000..d1921d75 --- /dev/null +++ b/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-k.svg @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 + + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 +LOO PIT +ECDF +ppc_loo_pit_ecdf (K) + + diff --git a/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-prob.svg b/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-prob.svg new file mode 100644 index 00000000..f65e2ae1 --- /dev/null +++ b/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-prob.svg @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 + + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 +LOO PIT +ECDF +ppc_loo_pit_ecdf (prob) + + diff --git a/tests/testthat/test-ppc-loo.R b/tests/testthat/test-ppc-loo.R index 47afe69d..d6424cf2 100644 --- a/tests/testthat/test-ppc-loo.R +++ b/tests/testthat/test-ppc-loo.R @@ -67,16 +67,50 @@ test_that("ppc_loo_pit_qq returns ggplot object", { skip_if_not_installed("loo") expect_gg(p1 <- ppc_loo_pit_qq(y, yrep, lw)) expect_gg(p2 <- ppc_loo_pit_qq(y, yrep, psis_object = psis1)) - expect_equal(p1$labels$x, "Uniform") + if ("get_labs" %in% getNamespaceExports("ggplot2")) { + ll1 <- ggplot2::get_labs(p1) + } else { + ll1 <- p1$labels + } + expect_equal(ll1$x, "Uniform") expect_equal(p1$data, p2$data) expect_gg(p3 <- ppc_loo_pit_qq(y, yrep, lw, compare = "normal")) - expect_equal(p3$labels$x, "Normal") + if ("get_labs" %in% getNamespaceExports("ggplot2")) { + ll3 <- ggplot2::get_labs(p3) + } else { + ll3 <- p3$labels + } + expect_equal(ll3$x, "Normal") }) -test_that("ppc_loo_pit functions work when pit specified instead of y,yrep,lw", { +test_that("ppc_loo_pit_ecdf returns a ggplot object", { + skip_if_not_installed("rstanarm") + skip_if_not_installed("loo") + expect_gg(p0 <- ppc_loo_pit_ecdf(y, yrep, lw)) + expect_gg(p1 <- ppc_loo_pit_ecdf(y, yrep, lw)) + expect_gg(p2 <- ppc_loo_pit_ecdf(y, yrep, psis_object = psis1)) + if ("get_labs" %in% getNamespaceExports("ggplot2")) { + ll1 <- ggplot2::get_labs(p1) + } else { + ll1 <- p1$labels + } + expect_equal(ll1$x, "LOO PIT") + expect_equal(ll1$y, "ECDF") + expect_equal(p1$data, p2$data) + expect_gg(p3 <- ppc_loo_pit_ecdf(y, yrep, lw, plot_diff = TRUE)) + if ("get_labs" %in% getNamespaceExports("ggplot2")) { + ll3 <- ggplot2::get_labs(p3) + } else { + ll3 <- p3$labels + } + expect_equal(ll3$y, "ECDF difference") +}) + +test_that("ppc_loo_pit functions work when pit specified instead of y, yrep, and lw", { skip_if_not_installed("rstanarm") skip_if_not_installed("loo") expect_gg(ppc_loo_pit_qq(pit = pits)) + # ppc_loo_pit_qq expect_message( p1 <- ppc_loo_pit_qq(y = y, yrep = yrep, lw = lw, pit = pits), "'pit' specified so ignoring 'y','yrep','lw' if specified" @@ -86,7 +120,18 @@ test_that("ppc_loo_pit functions work when pit specified instead of y,yrep,lw", ) expect_equal(p1$data, p2$data) + # ppc_loo_pit_ecdf + expect_gg(ppc_loo_pit_ecdf(pit = rep(pits, 4))) + expect_message( + p1 <- ppc_loo_pit_ecdf(y = y, yrep = yrep, lw = lw, pit = rep(pits, 4)), + "'pit' specified so ignoring 'y','yrep','lw' if specified" + ) + expect_message( + p2 <- ppc_loo_pit_ecdf(pit = rep(pits, 4)) + ) + expect_equal(p1$data, p2$data) + # ppc_loo_pit_overlay expect_gg(p1 <- ppc_loo_pit_overlay(pit = pits)) expect_message( ppc_loo_pit_overlay(y = y, yrep = yrep, lw = lw, pit = pits), @@ -255,3 +300,42 @@ test_that("ppc_loo_ribbon renders correctly", { vdiffr::expect_doppelganger("ppc_loo_ribbon (subset)", p_custom) }) +test_that("ppc_loo_pit_ecdf renders correctly", { + skip_on_cran() + skip_if_not_installed("vdiffr") + skip_if_not_installed("loo") + skip_on_r_oldrel() + + psis_object <- suppressWarnings(loo::psis(-vdiff_loo_lw)) + p_base <- ppc_loo_pit_ecdf( + vdiff_loo_y, + vdiff_loo_yrep, + psis_object = psis_object + ) + vdiffr::expect_doppelganger("ppc_loo_pit_ecdf (default)", p_base) + + p_custom <- ppc_loo_pit_ecdf( + vdiff_loo_y, + vdiff_loo_yrep, + psis_object = psis_object, + K = 50 + ) + vdiffr::expect_doppelganger("ppc_loo_pit_ecdf (K)", p_custom) + + p_custom <- ppc_loo_pit_ecdf( + vdiff_loo_y, + vdiff_loo_yrep, + psis_object = psis_object, + prob = 0.95 + ) + vdiffr::expect_doppelganger("ppc_loo_pit_ecdf (prob)", p_custom) + + p_custom <- ppc_loo_pit_ecdf( + vdiff_loo_y, + vdiff_loo_yrep, + psis_object = psis_object, + plot_diff = TRUE, + K = 100 + ) + vdiffr::expect_doppelganger("ppc_loo_pit_ecdf (ecdf difference)", p_custom) +})