From 463f46a2ba9949d04cf32d7a6aaefd750764ccfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20S=C3=A4ilynoja?= Date: Wed, 23 Apr 2025 11:13:36 +0300 Subject: [PATCH 1/6] added ppc_loo_pit_ecdf() --- NAMESPACE | 1 + NEWS.md | 2 + R/ppc-loo.R | 254 +++++++++++++++++++++++++++++++++++-------------- man/PPC-loo.Rd | 75 +++++++++++---- 4 files changed, 244 insertions(+), 88 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 7fe559c6..614fe4e7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -144,6 +144,7 @@ export(ppc_km_overlay_grouped) export(ppc_loo_intervals) export(ppc_loo_pit) export(ppc_loo_pit_data) +export(ppc_loo_pit_ecdf) export(ppc_loo_pit_overlay) export(ppc_loo_pit_qq) export(ppc_loo_ribbon) diff --git a/NEWS.md b/NEWS.md index ddd210d9..8a9c606e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # bayesplot (development version) +* Added `ppc_loo_pit_ecdf()` bu @TeemuSäilynoja + # bayesplot 1.12.0 * Expand checking workflows to more platforms by @andrjohns (#324) diff --git a/R/ppc-loo.R b/R/ppc-loo.R index b81d4ec3..bcec4775 100644 --- a/R/ppc-loo.R +++ b/R/ppc-loo.R @@ -15,9 +15,9 @@ #' @param psis_object If using **loo** version `2.0.0` or greater, an #' object returned by the `psis()` function (or by the `loo()` function #' with argument `save_psis` set to `TRUE`). -#' @param alpha,size,fatten,linewidth Arguments passed to code geoms to control plot -#' aesthetics. For `ppc_loo_pit_qq()` and `ppc_loo_pit_overlay()`, `size` and -#' `alpha` are passed to [ggplot2::geom_point()] and +#' @param alpha,size,fatten,linewidth Arguments passed to code geoms to control +#' plot aesthetics. For `ppc_loo_pit_qq()` and `ppc_loo_pit_overlay()`,`size` +#' and `alpha` are passed to [ggplot2::geom_point()] and #' [ggplot2::geom_density()], respectively. For `ppc_loo_intervals()`, `size` #' `linewidth` and `fatten` are passed to [ggplot2::geom_pointrange()]. For #' `ppc_loo_ribbon()`, `alpha` and `size` are passed to @@ -65,7 +65,6 @@ #' @template reference-loo #' #' @examples -#' #' \dontrun{ #' library(rstanarm) #' library(loo) @@ -73,12 +72,12 @@ #' head(radon) #' fit <- stan_lmer( #' log_radon ~ floor + log_uranium + floor:log_uranium -#' + (1 + floor | county), +#' + (1 + floor | county), #' data = radon, #' iter = 100, #' chains = 2, #' cores = 2 -#' ) +#' ) #' y <- radon$log_radon #' yrep <- posterior_predict(fit) #' @@ -93,6 +92,12 @@ #' ppc_loo_pit_qq(y, yrep, lw = lw) #' ppc_loo_pit_qq(y, yrep, lw = lw, compare = "normal") #' +#' # predictive calibration check using LOO probability integral transform +#' ppc_loo_pit_ecdf(y, yrep, lw) +#' +#' # With `plot_diff = TRUE` it is easier to assess the calibration. +#' ppc_loo_pit_ecdf(y, yrep, lw, plot_diff = TRUE) +#' #' # can use the psis object instead of lw #' ppc_loo_pit_qq(y, yrep, psis_object = psis1) #' @@ -101,18 +106,21 @@ #' ppc_loo_intervals(y, yrep, psis_object = psis1, subset = keep_obs) #' #' color_scheme_set("gray") -#' ppc_loo_intervals(y, yrep, psis_object = psis1, subset = keep_obs, -#' order = "median") +#' ppc_loo_intervals(y, yrep, +#' psis_object = psis1, subset = keep_obs, +#' order = "median" +#' ) #' } #' NULL #' @rdname PPC-loo #' @export -#' @param pit For `ppc_loo_pit_overlay()` and `ppc_loo_pit_qq()`, optionally a -#' vector of precomputed PIT values that can be specified instead of `y`, -#' `yrep`, and `lw` (these are all ignored if `pit` is specified). If not -#' specified the PIT values are computed internally before plotting. +#' @param pit For `ppc_loo_pit_overlay()`, `ppc_loo_pit_qq()`, and +#' `ppc_loo_pit_ecdf()` optionally a vector of precomputed PIT values that +#' can be specified instead of `y`, `yrep`, and `lw` (these are all ignored +#' if `pit` is specified). If not specified the PIT values are computed +#' internally before plotting. #' @param samples For `ppc_loo_pit_overlay()`, the number of data sets (each #' the same size as `y`) to simulate from the standard uniform #' distribution. The default is 100. The density estimate of each dataset is @@ -182,31 +190,34 @@ ppc_loo_pit_overlay <- function(y, ) } - message(paste("NOTE: The kernel density estimate assumes continuous observations", - "and is not optimal for discrete observations.")) + message(paste( + "NOTE: The kernel density estimate assumes continuous observations", + "and is not optimal for discrete observations." + )) if (boundary_correction) { p <- ggplot(data) + aes(x = .data$x, y = .data$value) + geom_line( - aes(group = .data$rep_id, color = "yrep"), + aes(group = .data$rep_id, color = "yrep"), data = function(x) dplyr::filter(x, !.data$is_y), alpha = alpha, linewidth = size, - na.rm = TRUE) + + na.rm = TRUE + ) + geom_line( aes(color = "y"), data = function(x) dplyr::filter(x, .data$is_y), - linewidth = 1, + linewidth = 1, lineend = "round", - na.rm = TRUE) + + na.rm = TRUE + ) + scale_x_continuous( limits = c(0, 1), expand = expansion(0, 0.01), breaks = seq(0, 1, by = 0.25), labels = c("0", "0.25", "0.5", "0.75", "1") ) - } else { p <- ggplot(data) + aes(x = .data$value) + @@ -222,7 +233,8 @@ ppc_loo_pit_overlay <- function(y, adjust = adjust, kernel = kernel, n = n_dens, - na.rm = TRUE) + + na.rm = TRUE + ) + stat_density( aes(color = "y"), data = function(x) dplyr::filter(x, .data$is_y), @@ -235,7 +247,8 @@ ppc_loo_pit_overlay <- function(y, adjust = adjust, kernel = kernel, n = n_dens, - na.rm = TRUE) + + na.rm = TRUE + ) + scale_x_continuous( limits = c(0.05, 0.95), expand = expansion(0, 0), @@ -344,19 +357,110 @@ ppc_loo_pit_qq <- function(y, distribution = theoretical, color = get_color("m"), size = size, - alpha = alpha) + + alpha = alpha + ) + geom_abline( linetype = 2, - color = "black") + + color = "black" + ) + bayesplot_theme_get() + labs(x = x_lab, y = y_lab) if (compare == "uniform") { - qq + lims(x=c(0,1), y=c(0,1)) + qq + lims(x = c(0, 1), y = c(0, 1)) } else { qq } } +#' @rdname PPC-loo +#' @export +#' @param eval_points For `ppc_loo_pit_ecdf()`, an optional integer defining +#' the number of equally spaced evaluation points for the ECDF. Reducing +#' `eval_poins` when using `interpolate_adj = FALSE` makes computing the +#' confidence bands faster. If `pit` is supplied, defaults to `length(pit)`, +#' otherwise `min(nrow(yrep) + 1, +#' @param prob For `ppc_loo_pit_ecdf()`, the desired simultaneous coverage +#' level of the bands around the ECDF. A value in (0,1). +#' @param plot_diff For `ppc_loo_pit_ecdf()`, a boolean defining whether to +#' plot the difference between the observed PIT-ECDF and the theoretical +#' expectation for uniform PIT values rather than plotting the regular ECDF. +#' The default is `FALSE`, but for large samples we recommend setting +#' `plot_diff = TRUE` to better use the plot area. +#' @param interval_method For `ppc_loo_pit_ecdf()`, a string that can be either +#' "interpolate" or "optimize". If "simulate" (the default), the simultaneous +#' confidence bands are interpolated based on precomputed values rather than +#' solved for the specific combination of `eval_points` and `dim(yrep)`. +#' The default is to use interpolation if `eval_points` is greater than 200. +#' +ppc_loo_pit_ecdf <- function(y, + yrep, + lw = NULL, + ..., + psis_object = NULL, + pit = NULL, + eval_points = NULL, + prob = .99, + plot_diff = FALSE, + interval_method = c("interpolate", "optimize")) { + check_ignored_arguments(...) + + interval_method <- match.arg(interval_method) + if (!is.null(pit)) { + inform("'pit' specified so ignoring 'y','yrep','lw' if specified.") + pit <- validate_pit(pit) + if (is.null(eval_points)) { + eval_points <- length(pit) + } + } else { + suggested_package("rstantools") + y <- validate_y(y) + yrep <- validate_predictions(yrep, length(y)) + lw <- .get_lw(lw, psis_object) + stopifnot(identical(dim(yrep), dim(lw))) + pit <- pmin(1, rstantools::loo_pit(object = yrep, y = y, lw = lw)) + if (is.null(eval_points)) { + eval_points <- min(nrow(yrep) + 1, 1000) + } + } + + n_obs <- length(pit) + gamma <- adjust_gamma( + N = n_obs, + K = eval_points, + prob = prob, + interpolate_adj = interval_method == "interpolate" + ) + lims <- ecdf_intervals(gamma = gamma, N = n_obs, K = eval_points) + ggplot() + + aes( + x = seq(0, 1, length.out = eval_points), + y = ecdf(pit)(seq(0, 1, length.out = eval_points)) - + (plot_diff == TRUE) * seq(0, 1, length.out = eval_points), + color = "y" + ) + + geom_step(show.legend = FALSE) + + geom_step( + aes( + y = lims$upper[-1] / n_obs - + (plot_diff == TRUE) * seq(0, 1, length.out = eval_points), + color = "yrep" + ), + linetype = 2, show.legend = FALSE + ) + + geom_step( + aes( + y = lims$lower[-1] / n_obs - + (plot_diff == TRUE) * seq(0, 1, length.out = eval_points), + color = "yrep" + ), + linetype = 2, show.legend = FALSE + ) + + labs(y = ifelse(plot_diff, "ECDF difference", "ECDF"), x = "LOO PIT") + + yaxis_ticks(FALSE) + + scale_color_ppc() + + bayesplot_theme_get() +} + #' @rdname PPC-loo #' @export @@ -421,7 +525,6 @@ ppc_loo_intervals <- fatten = 2.5, linewidth = 1, order = c("index", "median")) { - check_ignored_arguments(...) y <- validate_y(y) order_by_median <- match.arg(order) == "median" @@ -440,7 +543,7 @@ ppc_loo_intervals <- if (!is.null(subset)) { stopifnot(length(y) >= length(subset)) y <- y[subset] - yrep <- yrep[, subset, drop=FALSE] + yrep <- yrep[, subset, drop = FALSE] psis_object <- .psis_subset(psis_object, subset) } probs <- sort(c(prob, prob_outer)) @@ -523,7 +626,7 @@ ppc_loo_ribbon <- "'yrep', 'psis_object', 'subset', if specified." )) if (ncol(intervals) == 3) { - intervals <- cbind(intervals[, 1], intervals, intervals[, 3]) + intervals <- cbind(intervals[, 1], intervals, intervals[, 3]) } } else { suggested_package("loo", min_version = "2.0.0") @@ -531,7 +634,7 @@ ppc_loo_ribbon <- if (!is.null(subset)) { stopifnot(length(y) >= length(subset)) y <- y[subset] - yrep <- yrep[, subset, drop=FALSE] + yrep <- yrep[, subset, drop = FALSE] psis_object <- .psis_subset(psis_object, subset) } probs <- sort(c(prob, prob_outer)) @@ -570,7 +673,7 @@ ppc_loo_ribbon <- geom_line( aes(y = .data$y_obs, color = "y"), linewidth = 0.5, - alpha = 2/3 + alpha = 2 / 3 ) + scale_color_ppc() + scale_fill_ppc(values = c(NA, get_color("l"))) + @@ -589,10 +692,11 @@ ppc_loo_ribbon <- y_obs = y, x = x, ll = intervals[, 1], - l = intervals[, 2], - m = intervals[, 3], - h = intervals[, 4], - hh = intervals[, 5]) + l = intervals[, 2], + m = intervals[, 3], + h = intervals[, 4], + hh = intervals[, 5] + ) } # subset a psis_object without breaking it @@ -602,7 +706,7 @@ ppc_loo_ribbon <- if (length(subset) > dim(psis_object)[2]) { abort("'subset' has too many elements.") } - psis_object$log_weights <- psis_object$log_weights[, subset, drop=FALSE] + psis_object$log_weights <- psis_object$log_weights[, subset, drop = FALSE] psis_object$diagnostics$pareto_k <- psis_object$diagnostics$pareto_k[subset] psis_object$diagnostics$n_eff <- psis_object$diagnostics$n_eff[subset] @@ -618,29 +722,28 @@ ppc_loo_ribbon <- # convolution with a Gaussian filter. # Based on scipy.signal.gaussian formula -.gaussian <- function(N, bw){ - n <- seq(0, N -1) - (N - 1)/2 - sigma = 2 * bw * bw - w = exp(-n^2 / sigma) +.gaussian <- function(N, bw) { + n <- seq(0, N - 1) - (N - 1) / 2 + sigma <- 2 * bw * bw + w <- exp(-n^2 / sigma) return(w) - } .linear_convolution <- function(x, bw, grid_counts, grid_breaks, - grid_len){ + grid_len) { # 1-D Gaussian estimation via # convolution of a Gaussian filter and the binned relative freqs - bin_width <- grid_breaks[2] - grid_breaks[1] + bin_width <- grid_breaks[2] - grid_breaks[1] f <- grid_counts / bin_width / length(x) bw <- bw / bin_width # number of data points to generate for gaussian filter - gauss_n <- as.integer(bw * 2 *pi) - if (gauss_n == 0){ - gauss_n = 1 + gauss_n <- as.integer(bw * 2 * pi) + if (gauss_n == 0) { + gauss_n <- 1 } # Generate Gaussian filter vector @@ -648,39 +751,44 @@ ppc_loo_ribbon <- npad <- as.integer(grid_len / 5) # Reflection method (i.e. get first N and last N points to pad vector) - f <- c(rev(f[1:(npad)]), - f, - rev(f)[(grid_len - npad):(grid_len - 1)]) + f <- c( + rev(f[1:(npad)]), + f, + rev(f)[(grid_len - npad):(grid_len - 1)] + ) # Convolution: Gaussian filter + reflection method (pading) works as an # averaging moving window based on a Gaussian density which takes care # of the density boundary values near 0 and 1. bc_pvals <- stats::filter(f, - kernel, - method = 'convolution', - sides = 2)[(npad + 1):(npad + grid_len)] + kernel, + method = "convolution", + sides = 2 + )[(npad + 1):(npad + grid_len)] bc_pvals / (bw * (2 * pi)^0.5) } .kde_correction <- function(x, bw, - grid_len){ + grid_len) { # Generate boundary corrected values via a linear convolution using a # 1-D Gaussian window filter. This method uses the "reflection method" # to estimate these pvalues and helps speed up the code - if (any(is.infinite(x))){ - warning(paste("Ignored", sum(is.infinite(x)), - "Non-finite PIT values are invalid for KDE boundary correction method")) + if (any(is.infinite(x))) { + warning(paste( + "Ignored", sum(is.infinite(x)), + "Non-finite PIT values are invalid for KDE boundary correction method" + )) x <- x[is.finite(x)] } - if (grid_len < 100){ + if (grid_len < 100) { grid_len <- 100 } # Get relative frequency boundaries and counts for input vector - bins <- seq(from= min(x), to = max(x), length.out = grid_len + 1) + bins <- seq(from = min(x), to = max(x), length.out = grid_len + 1) hist_obj <- graphics::hist(x, breaks = bins, plot = FALSE) grid_breaks <- hist_obj$breaks grid_counts <- hist_obj$counts @@ -694,10 +802,10 @@ ppc_loo_ribbon <- # Generate vector of x-axis values for plotting based on binned relative freqs n_breaks <- length(grid_breaks) - xs <- (grid_breaks[2:n_breaks] + grid_breaks[1:(n_breaks - 1)]) / 2 + xs <- (grid_breaks[2:n_breaks] + grid_breaks[1:(n_breaks - 1)]) / 2 - first_nonNA <- utils::head(which(!is.na(bc_pvals)),1) - last_nonNA <- utils::tail(which(!is.na(bc_pvals)),1) + first_nonNA <- utils::head(which(!is.na(bc_pvals)), 1) + last_nonNA <- utils::tail(which(!is.na(bc_pvals)), 1) bc_pvals[1:first_nonNA] <- bc_pvals[first_nonNA] bc_pvals[last_nonNA:length(bc_pvals)] <- bc_pvals[last_nonNA] @@ -706,23 +814,25 @@ ppc_loo_ribbon <- # Wrapper function to generate runif reference lines based on # .kde_correction() -.ref_kde_correction <- function(unifs, bw, grid_len){ - +.ref_kde_correction <- function(unifs, bw, grid_len) { # Allocate memory - idx <- seq(from = 1, - to = ncol(unifs)*nrow(unifs) + ncol(unifs), - by = ncol(unifs)) - idx <- c(idx, ncol(unifs)*nrow(unifs)) - xs <- rep(0, ncol(unifs)*nrow(unifs)) + idx <- seq( + from = 1, + to = ncol(unifs) * nrow(unifs) + ncol(unifs), + by = ncol(unifs) + ) + idx <- c(idx, ncol(unifs) * nrow(unifs)) + xs <- rep(0, ncol(unifs) * nrow(unifs)) bc_mat <- matrix(0, nrow(unifs), ncol(unifs)) # Generate boundary corrected reference values - for (i in 1:nrow(unifs)){ - bc_list <- .kde_correction(unifs[i,], - bw = bw, - grid_len = grid_len) - bc_mat[i,] <- bc_list$bc_pvals - xs[idx[i]:(idx[i+1]-1)] <- bc_list$xs + for (i in 1:nrow(unifs)) { + bc_list <- .kde_correction(unifs[i, ], + bw = bw, + grid_len = grid_len + ) + bc_mat[i, ] <- bc_list$bc_pvals + xs[idx[i]:(idx[i + 1] - 1)] <- bc_list$xs } list(xs = xs, unifs = bc_mat) diff --git a/man/PPC-loo.Rd b/man/PPC-loo.Rd index 58e6a7fd..81bb4ec7 100644 --- a/man/PPC-loo.Rd +++ b/man/PPC-loo.Rd @@ -5,6 +5,7 @@ \alias{ppc_loo_pit_overlay} \alias{ppc_loo_pit_data} \alias{ppc_loo_pit_qq} +\alias{ppc_loo_pit_ecdf} \alias{ppc_loo_pit} \alias{ppc_loo_intervals} \alias{ppc_loo_ribbon} @@ -54,6 +55,19 @@ ppc_loo_pit_qq( alpha = 1 ) +ppc_loo_pit_ecdf( + y, + yrep, + lw = NULL, + ..., + psis_object = NULL, + pit = NULL, + eval_points = NULL, + prob = 0.99, + plot_diff = FALSE, + interval_method = c("interpolate", "optimize") +) + ppc_loo_pit( y, yrep, @@ -116,10 +130,11 @@ the \strong{Examples} section, below. If \code{lw} is not specified then object returned by the \code{psis()} function (or by the \code{loo()} function with argument \code{save_psis} set to \code{TRUE}).} -\item{pit}{For \code{ppc_loo_pit_overlay()} and \code{ppc_loo_pit_qq()}, optionally a -vector of precomputed PIT values that can be specified instead of \code{y}, -\code{yrep}, and \code{lw} (these are all ignored if \code{pit} is specified). If not -specified the PIT values are computed internally before plotting.} +\item{pit}{For \code{ppc_loo_pit_overlay()}, \code{ppc_loo_pit_qq()}, and +\code{ppc_loo_pit_ecdf()} optionally a vector of precomputed PIT values that +can be specified instead of \code{y}, \code{yrep}, and \code{lw} (these are all ignored +if \code{pit} is specified). If not specified the PIT values are computed +internally before plotting.} \item{samples}{For \code{ppc_loo_pit_overlay()}, the number of data sets (each the same size as \code{y}) to simulate from the standard uniform @@ -127,9 +142,9 @@ distribution. The default is 100. The density estimate of each dataset is plotted as a thin line in the plot, with the density estimate of the LOO PITs overlaid as a thicker dark line.} -\item{alpha, size, fatten, linewidth}{Arguments passed to code geoms to control plot -aesthetics. For \code{ppc_loo_pit_qq()} and \code{ppc_loo_pit_overlay()}, \code{size} and -\code{alpha} are passed to \code{\link[ggplot2:geom_point]{ggplot2::geom_point()}} and +\item{alpha, size, fatten, linewidth}{Arguments passed to code geoms to control +plot aesthetics. For \code{ppc_loo_pit_qq()} and \code{ppc_loo_pit_overlay()},\code{size} +and \code{alpha} are passed to \code{\link[ggplot2:geom_point]{ggplot2::geom_point()}} and \code{\link[ggplot2:geom_density]{ggplot2::geom_density()}}, respectively. For \code{ppc_loo_intervals()}, \code{size} \code{linewidth} and \code{fatten} are passed to \code{\link[ggplot2:geom_linerange]{ggplot2::geom_pointrange()}}. For \code{ppc_loo_ribbon()}, \code{alpha} and \code{size} are passed to @@ -160,6 +175,27 @@ compares computed PIT values to the standard uniform distribution. If calculated from the PIT values to the theoretical standard normal quantiles.} +\item{eval_points}{For \code{ppc_loo_pit_ecdf()}, an optional integer defining +the number of equally spaced evaluation points for the ECDF. Reducing +\code{eval_poins} when using \code{interpolate_adj = FALSE} makes computing the +confidence bands faster. If \code{pit} is supplied, defaults to \code{length(pit)}, +otherwise `min(nrow(yrep) + 1,} + +\item{prob}{For \code{ppc_loo_pit_ecdf()}, the desired simultaneous coverage +level of the bands around the ECDF. A value in (0,1).} + +\item{plot_diff}{For \code{ppc_loo_pit_ecdf()}, a boolean defining whether to +plot the difference between the observed PIT-ECDF and the theoretical +expectation for uniform PIT values rather than plotting the regular ECDF. +The default is \code{FALSE}, but for large samples we recommend setting +\code{plot_diff = TRUE} to better use the plot area.} + +\item{interval_method}{For \code{ppc_loo_pit_ecdf()}, a string that can be either +"interpolate" or "optimize". If "simulate" (the default), the simultaneous +confidence bands are interpolated based on precomputed values rather than +solved for the specific combination of \code{eval_points} and \code{dim(yrep)}. +The default is to use interpolation if \code{eval_points} is greater than 200.} + \item{subset}{For \code{ppc_loo_intervals()} and \code{ppc_loo_ribbon()}, an optional integer vector indicating which observations in \code{y} (and \code{yrep}) to include. Dropping observations from \code{y} and \code{yrep} manually before passing @@ -178,14 +214,14 @@ data points and five columns in the following order: lower outer interval, lower inner interval, median (50\%), upper inner interval and upper outer interval (column names are ignored).} -\item{prob, prob_outer}{Values between \code{0} and \code{1} indicating the desired -probability mass to include in the inner and outer intervals. The defaults -are \code{prob=0.5} and \code{prob_outer=0.9}.} - \item{order}{For \code{ppc_loo_intervals()}, a string indicating how to arrange the plotted intervals. The default (\code{"index"}) is to plot them in the order of the observations. The alternative (\code{"median"}) arranges them by median value from smallest (left) to largest (right).} + +\item{prob, prob_outer}{Values between \code{0} and \code{1} indicating the desired +probability mass to include in the inner and outer intervals. The defaults +are \code{prob=0.5} and \code{prob_outer=0.9}.} } \value{ A ggplot object that can be further customized using the \strong{ggplot2} package. @@ -232,7 +268,6 @@ the LOO predictive distribution. } \examples{ - \dontrun{ library(rstanarm) library(loo) @@ -240,12 +275,12 @@ library(loo) head(radon) fit <- stan_lmer( log_radon ~ floor + log_uranium + floor:log_uranium - + (1 + floor | county), + + (1 + floor | county), data = radon, iter = 100, chains = 2, cores = 2 - ) +) y <- radon$log_radon yrep <- posterior_predict(fit) @@ -260,6 +295,12 @@ ppc_loo_pit_overlay(y, yrep, lw = lw) ppc_loo_pit_qq(y, yrep, lw = lw) ppc_loo_pit_qq(y, yrep, lw = lw, compare = "normal") +# predictive calibration check using LOO probability integral transform +ppc_loo_pit_ecdf(y, yrep, lw) + +# With `plot_diff = TRUE` it is easier to assess the calibration. +ppc_loo_pit_ecdf(y, yrep, lw, plot_diff = TRUE) + # can use the psis object instead of lw ppc_loo_pit_qq(y, yrep, psis_object = psis1) @@ -268,8 +309,10 @@ keep_obs <- 1:50 ppc_loo_intervals(y, yrep, psis_object = psis1, subset = keep_obs) color_scheme_set("gray") -ppc_loo_intervals(y, yrep, psis_object = psis1, subset = keep_obs, - order = "median") +ppc_loo_intervals(y, yrep, + psis_object = psis1, subset = keep_obs, + order = "median" +) } } From 3ba857138110df1ab2bb5321d03135cf5832d954 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20S=C3=A4ilynoja?= Date: Wed, 23 Apr 2025 11:13:55 +0300 Subject: [PATCH 2/6] test ppc_loo_pit_ecdf() --- .../ppc-loo/ppc-loo-pit-ecdf-default.svg | 59 ++++++++++++++++++ .../ppc-loo-pit-ecdf-ecdf-difference.svg | 55 ++++++++++++++++ .../ppc-loo/ppc-loo-pit-ecdf-eval-points.svg | 59 ++++++++++++++++++ .../_snaps/ppc-loo/ppc-loo-pit-ecdf-prob.svg | 59 ++++++++++++++++++ tests/testthat/test-ppc-loo.R | 62 ++++++++++++++++++- 5 files changed, 293 insertions(+), 1 deletion(-) create mode 100644 tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-default.svg create mode 100644 tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-ecdf-difference.svg create mode 100644 tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-eval-points.svg create mode 100644 tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-prob.svg diff --git a/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-default.svg b/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-default.svg new file mode 100644 index 00000000..39aa3153 --- /dev/null +++ b/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-default.svg @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 + + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 +LOO PIT +ECDF +ppc_loo_pit_ecdf (default) + + diff --git a/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-ecdf-difference.svg b/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-ecdf-difference.svg new file mode 100644 index 00000000..cbc4d379 --- /dev/null +++ b/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-ecdf-difference.svg @@ -0,0 +1,55 @@ + + + + + + + + + + + + + + + + + + + + + + + + + +-0.05 +0.00 +0.05 + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 +LOO PIT +ECDF difference +ppc_loo_pit_ecdf (ecdf difference) + + diff --git a/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-eval-points.svg b/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-eval-points.svg new file mode 100644 index 00000000..631887b5 --- /dev/null +++ b/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-eval-points.svg @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 + + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 +LOO PIT +ECDF +ppc_loo_pit_ecdf (eval_points) + + diff --git a/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-prob.svg b/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-prob.svg new file mode 100644 index 00000000..c9390e00 --- /dev/null +++ b/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-prob.svg @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 + + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 +LOO PIT +ECDF +ppc_loo_pit_ecdf (prob) + + diff --git a/tests/testthat/test-ppc-loo.R b/tests/testthat/test-ppc-loo.R index 47afe69d..2ade9806 100644 --- a/tests/testthat/test-ppc-loo.R +++ b/tests/testthat/test-ppc-loo.R @@ -73,10 +73,25 @@ test_that("ppc_loo_pit_qq returns ggplot object", { expect_equal(p3$labels$x, "Normal") }) -test_that("ppc_loo_pit functions work when pit specified instead of y,yrep,lw", { +test_that("ppc_loo_pit_ecdf returns a ggplot object", { + skip_if_not_installed("rstanarm") + skip_if_not_installed("loo") + expect_gg(p0 <- ppc_loo_pit_ecdf(y, yrep, lw, interval_method = "optimize", + eval_points = 100)) + expect_gg(p1 <- ppc_loo_pit_ecdf(y, yrep, lw, interval_method = "optimize")) + expect_gg(p2 <- ppc_loo_pit_ecdf(y, yrep, psis_object = psis1, interval_method = "optimize")) + expect_equal(p1$labels$x, "LOO PIT") + expect_equal(p1$labels$y, "ECDF") + expect_equal(p1$data, p2$data) + expect_gg(p3 <- ppc_loo_pit_ecdf(y, yrep, lw, plot_diff = TRUE, interval_method = "optimize")) + expect_equal(p3$labels$y, "ECDF difference") +}) + +test_that("ppc_loo_pit functions work when pit specified instead of y, yrep, and lw", { skip_if_not_installed("rstanarm") skip_if_not_installed("loo") expect_gg(ppc_loo_pit_qq(pit = pits)) + # ppc_loo_pit_qq expect_message( p1 <- ppc_loo_pit_qq(y = y, yrep = yrep, lw = lw, pit = pits), "'pit' specified so ignoring 'y','yrep','lw' if specified" @@ -86,7 +101,18 @@ test_that("ppc_loo_pit functions work when pit specified instead of y,yrep,lw", ) expect_equal(p1$data, p2$data) + # ppc_loo_pit_ecdf + expect_gg(ppc_loo_pit_ecdf(pit = rep(pits, 4))) + expect_message( + p1 <- ppc_loo_pit_ecdf(y = y, yrep = yrep, lw = lw, pit = rep(pits, 4)), + "'pit' specified so ignoring 'y','yrep','lw' if specified" + ) + expect_message( + p2 <- ppc_loo_pit_ecdf(pit = rep(pits, 4)) + ) + expect_equal(p1$data, p2$data) + # ppc_loo_pit_overlay expect_gg(p1 <- ppc_loo_pit_overlay(pit = pits)) expect_message( ppc_loo_pit_overlay(y = y, yrep = yrep, lw = lw, pit = pits), @@ -255,3 +281,37 @@ test_that("ppc_loo_ribbon renders correctly", { vdiffr::expect_doppelganger("ppc_loo_ribbon (subset)", p_custom) }) +test_that("ppc_loo_pit_ecdf renders correctly", { + skip_on_cran() + skip_if_not_installed("vdiffr") + skip_if_not_installed("loo") + skip_on_r_oldrel() + pit <- pmin(1, rstantools::loo_pit( + example_yrep_draws(), + example_y_data(), + matrix(log(1 / nrow(example_yrep_draws())), ncol = ncol(example_yrep_draws()), + nrow = nrow(example_yrep_draws())) + )) + + p_base <- ppc_loo_pit_ecdf(pit = pit) + vdiffr::expect_doppelganger("ppc_loo_pit_ecdf (default)", p_base) + + p_custom <- ppc_loo_pit_ecdf( + pit = pit, + eval_points = 200, + ) + vdiffr::expect_doppelganger("ppc_loo_pit_ecdf (eval_points)", p_custom) + + p_custom <- ppc_loo_pit_ecdf( + pit = pit, + prob = 0.95, + eval_points = 500 + ) + vdiffr::expect_doppelganger("ppc_loo_pit_ecdf (prob)", p_custom) + + p_custom <- ppc_loo_pit_ecdf( + pit = pit, + plot_diff = TRUE + ) + vdiffr::expect_doppelganger("ppc_loo_pit_ecdf (ecdf difference)", p_custom) +}) From bd794693de1d644a3e470bc7f006f9fac91574e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20S=C3=A4ilynoja?= <33978952+TeemuSailynoja@users.noreply.github.com> Date: Wed, 23 Apr 2025 11:17:13 +0300 Subject: [PATCH 3/6] fix typo --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 8a9c606e..83d34833 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,6 @@ # bayesplot (development version) -* Added `ppc_loo_pit_ecdf()` bu @TeemuSäilynoja +* Added `ppc_loo_pit_ecdf()` by @TeemuSailynoja # bayesplot 1.12.0 From ea71b9d367ad9e911c4b03cd5b1233f3b05e3932 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20S=C3=A4ilynoja?= Date: Thu, 24 Apr 2025 18:55:21 +0300 Subject: [PATCH 4/6] Align arguments of *_pit_ecdf() functions for now. --- R/ppc-loo.R | 67 ++++++++++--------- man/PPC-loo.Rd | 36 +++++----- .../ppc-loo/ppc-loo-pit-ecdf-default.svg | 6 +- .../ppc-loo-pit-ecdf-ecdf-difference.svg | 52 +++++++------- .../ppc-loo/ppc-loo-pit-ecdf-eval-points.svg | 59 ---------------- .../_snaps/ppc-loo/ppc-loo-pit-ecdf-k.svg | 59 ++++++++++++++++ .../_snaps/ppc-loo/ppc-loo-pit-ecdf-prob.svg | 6 +- tests/testthat/test-ppc-loo.R | 44 ++++++------ 8 files changed, 167 insertions(+), 162 deletions(-) delete mode 100644 tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-eval-points.svg create mode 100644 tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-k.svg diff --git a/R/ppc-loo.R b/R/ppc-loo.R index bcec4775..e1d6ec70 100644 --- a/R/ppc-loo.R +++ b/R/ppc-loo.R @@ -374,42 +374,40 @@ ppc_loo_pit_qq <- function(y, #' @rdname PPC-loo #' @export -#' @param eval_points For `ppc_loo_pit_ecdf()`, an optional integer defining -#' the number of equally spaced evaluation points for the ECDF. Reducing -#' `eval_poins` when using `interpolate_adj = FALSE` makes computing the -#' confidence bands faster. If `pit` is supplied, defaults to `length(pit)`, -#' otherwise `min(nrow(yrep) + 1, -#' @param prob For `ppc_loo_pit_ecdf()`, the desired simultaneous coverage -#' level of the bands around the ECDF. A value in (0,1). +#' @param K For `ppc_loo_pit_ecdf()` an optional integer defining the number +#' of equally spaced evaluation points for the PIT-ECDF. Reducing K when +#' using `interpolate_adj = FALSE` makes computing the confidence bands +#' faster. If `pit` is supplied, defaults to `length(pit)`, otherwise +#' `yrep` determines the maximum accuracy of the estimated PIT values and +#' `K` is set to `min(nrow(yrep) + 1, 1000)`. #' @param plot_diff For `ppc_loo_pit_ecdf()`, a boolean defining whether to -#' plot the difference between the observed PIT-ECDF and the theoretical -#' expectation for uniform PIT values rather than plotting the regular ECDF. -#' The default is `FALSE`, but for large samples we recommend setting -#' `plot_diff = TRUE` to better use the plot area. -#' @param interval_method For `ppc_loo_pit_ecdf()`, a string that can be either -#' "interpolate" or "optimize". If "simulate" (the default), the simultaneous -#' confidence bands are interpolated based on precomputed values rather than -#' solved for the specific combination of `eval_points` and `dim(yrep)`. -#' The default is to use interpolation if `eval_points` is greater than 200. -#' +#' plot the difference between the observed PIT-ECDF and the theoretical +#' expectation for uniform PIT values rather than plotting the regular ECDF. +#' The default is `FALSE`, but for large samples we recommend setting +#' `plot_diff = TRUE` to better use the plot area. +#' @param interpolate_adj For `ppc_loo_pit_ecdf()`, a boolean defining if the +#' simultaneous confidence bands should be interpolated based on precomputed +#' values rather than computed exactly. Computing the bands may be +#' computationally intensive and the approximation gives a fast method for +#' assessing the ECDF trajectory. The default is to use interpolation if `K` +#' is greater than 200. ppc_loo_pit_ecdf <- function(y, yrep, lw = NULL, ..., psis_object = NULL, pit = NULL, - eval_points = NULL, + K = NULL, prob = .99, plot_diff = FALSE, - interval_method = c("interpolate", "optimize")) { + interpolate_adj = NULL) { check_ignored_arguments(...) - interval_method <- match.arg(interval_method) if (!is.null(pit)) { inform("'pit' specified so ignoring 'y','yrep','lw' if specified.") pit <- validate_pit(pit) - if (is.null(eval_points)) { - eval_points <- length(pit) + if (is.null(K)) { + K <- length(pit) } } else { suggested_package("rstantools") @@ -418,31 +416,31 @@ ppc_loo_pit_ecdf <- function(y, lw <- .get_lw(lw, psis_object) stopifnot(identical(dim(yrep), dim(lw))) pit <- pmin(1, rstantools::loo_pit(object = yrep, y = y, lw = lw)) - if (is.null(eval_points)) { - eval_points <- min(nrow(yrep) + 1, 1000) + if (is.null(K)) { + K <- min(nrow(yrep) + 1, 1000) } } n_obs <- length(pit) gamma <- adjust_gamma( N = n_obs, - K = eval_points, + K = K, prob = prob, - interpolate_adj = interval_method == "interpolate" + interpolate_adj = interpolate_adj ) - lims <- ecdf_intervals(gamma = gamma, N = n_obs, K = eval_points) + lims <- ecdf_intervals(gamma = gamma, N = n_obs, K = K) ggplot() + aes( - x = seq(0, 1, length.out = eval_points), - y = ecdf(pit)(seq(0, 1, length.out = eval_points)) - - (plot_diff == TRUE) * seq(0, 1, length.out = eval_points), + x = seq(0, 1, length.out = K), + y = ecdf(pit)(seq(0, 1, length.out = K)) - + (plot_diff == TRUE) * seq(0, 1, length.out = K), color = "y" ) + geom_step(show.legend = FALSE) + geom_step( aes( y = lims$upper[-1] / n_obs - - (plot_diff == TRUE) * seq(0, 1, length.out = eval_points), + (plot_diff == TRUE) * seq(0, 1, length.out = K), color = "yrep" ), linetype = 2, show.legend = FALSE @@ -450,7 +448,7 @@ ppc_loo_pit_ecdf <- function(y, geom_step( aes( y = lims$lower[-1] / n_obs - - (plot_diff == TRUE) * seq(0, 1, length.out = eval_points), + (plot_diff == TRUE) * seq(0, 1, length.out = K), color = "yrep" ), linetype = 2, show.legend = FALSE @@ -489,7 +487,6 @@ ppc_loo_pit <- #' @rdname PPC-loo #' @export -#' @template args-prob-prob_outer #' @param intervals For `ppc_loo_intervals()` and `ppc_loo_ribbon()`, optionally #' a matrix of pre-computed LOO predictive intervals that can be specified #' instead of `yrep` (ignored if `intervals` is specified). If not specified @@ -502,6 +499,10 @@ ppc_loo_pit <- #' the plotted intervals. The default (`"index"`) is to plot them in the #' order of the observations. The alternative (`"median"`) arranges them #' by median value from smallest (left) to largest (right). +#' @param prob,prob_outer Values between `0` and `1` indicating the desired +#' probability mass to include in the inner and outer intervals. The defaults +#' are `prob=0.5` and `prob_outer=0.9` for `ppc_loo_intervals()` and +#' `prob = 0.99` for `ppc_loo_pit_ecdf()`. #' @param subset For `ppc_loo_intervals()` and `ppc_loo_ribbon()`, an optional #' integer vector indicating which observations in `y` (and `yrep`) to #' include. Dropping observations from `y` and `yrep` manually before passing diff --git a/man/PPC-loo.Rd b/man/PPC-loo.Rd index 81bb4ec7..42ed4821 100644 --- a/man/PPC-loo.Rd +++ b/man/PPC-loo.Rd @@ -62,10 +62,10 @@ ppc_loo_pit_ecdf( ..., psis_object = NULL, pit = NULL, - eval_points = NULL, + K = NULL, prob = 0.99, plot_diff = FALSE, - interval_method = c("interpolate", "optimize") + interpolate_adj = NULL ) ppc_loo_pit( @@ -175,14 +175,17 @@ compares computed PIT values to the standard uniform distribution. If calculated from the PIT values to the theoretical standard normal quantiles.} -\item{eval_points}{For \code{ppc_loo_pit_ecdf()}, an optional integer defining -the number of equally spaced evaluation points for the ECDF. Reducing -\code{eval_poins} when using \code{interpolate_adj = FALSE} makes computing the -confidence bands faster. If \code{pit} is supplied, defaults to \code{length(pit)}, -otherwise `min(nrow(yrep) + 1,} +\item{K}{For \code{ppc_loo_pit_ecdf()} an optional integer defining the number +of equally spaced evaluation points for the PIT-ECDF. Reducing K when +using \code{interpolate_adj = FALSE} makes computing the confidence bands +faster. If \code{pit} is supplied, defaults to \code{length(pit)}, otherwise +\code{yrep} determines the maximum accuracy of the estimated PIT values and +\code{K} is set to \code{min(nrow(yrep) + 1, 1000)}.} -\item{prob}{For \code{ppc_loo_pit_ecdf()}, the desired simultaneous coverage -level of the bands around the ECDF. A value in (0,1).} +\item{prob, prob_outer}{Values between \code{0} and \code{1} indicating the desired +probability mass to include in the inner and outer intervals. The defaults +are \code{prob=0.5} and \code{prob_outer=0.9} for \code{ppc_loo_intervals()} and +\code{prob = 0.99} for \code{ppc_loo_pit_ecdf()}.} \item{plot_diff}{For \code{ppc_loo_pit_ecdf()}, a boolean defining whether to plot the difference between the observed PIT-ECDF and the theoretical @@ -190,11 +193,12 @@ expectation for uniform PIT values rather than plotting the regular ECDF. The default is \code{FALSE}, but for large samples we recommend setting \code{plot_diff = TRUE} to better use the plot area.} -\item{interval_method}{For \code{ppc_loo_pit_ecdf()}, a string that can be either -"interpolate" or "optimize". If "simulate" (the default), the simultaneous -confidence bands are interpolated based on precomputed values rather than -solved for the specific combination of \code{eval_points} and \code{dim(yrep)}. -The default is to use interpolation if \code{eval_points} is greater than 200.} +\item{interpolate_adj}{For \code{ppc_loo_pit_ecdf()}, a boolean defining if the +simultaneous confidence bands should be interpolated based on precomputed +values rather than computed exactly. Computing the bands may be +computationally intensive and the approximation gives a fast method for +assessing the ECDF trajectory. The default is to use interpolation if \code{K} +is greater than 200.} \item{subset}{For \code{ppc_loo_intervals()} and \code{ppc_loo_ribbon()}, an optional integer vector indicating which observations in \code{y} (and \code{yrep}) to @@ -218,10 +222,6 @@ interval (column names are ignored).} the plotted intervals. The default (\code{"index"}) is to plot them in the order of the observations. The alternative (\code{"median"}) arranges them by median value from smallest (left) to largest (right).} - -\item{prob, prob_outer}{Values between \code{0} and \code{1} indicating the desired -probability mass to include in the inner and outer intervals. The defaults -are \code{prob=0.5} and \code{prob_outer=0.9}.} } \value{ A ggplot object that can be further customized using the \strong{ggplot2} package. diff --git a/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-default.svg b/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-default.svg index 39aa3153..87a4c0b9 100644 --- a/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-default.svg +++ b/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-default.svg @@ -25,9 +25,9 @@ - - - + + + diff --git a/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-ecdf-difference.svg b/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-ecdf-difference.svg index cbc4d379..ac754257 100644 --- a/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-ecdf-difference.svg +++ b/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-ecdf-difference.svg @@ -20,36 +20,36 @@ - - + + - - - - + + + + - --0.05 -0.00 -0.05 - - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 -LOO PIT + +-0.1 +0.0 +0.1 + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 +LOO PIT ECDF difference -ppc_loo_pit_ecdf (ecdf difference) +ppc_loo_pit_ecdf (ecdf difference) diff --git a/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-eval-points.svg b/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-eval-points.svg deleted file mode 100644 index 631887b5..00000000 --- a/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-eval-points.svg +++ /dev/null @@ -1,59 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 - - - - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 -LOO PIT -ECDF -ppc_loo_pit_ecdf (eval_points) - - diff --git a/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-k.svg b/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-k.svg new file mode 100644 index 00000000..d1921d75 --- /dev/null +++ b/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-k.svg @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 + + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 +LOO PIT +ECDF +ppc_loo_pit_ecdf (K) + + diff --git a/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-prob.svg b/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-prob.svg index c9390e00..f65e2ae1 100644 --- a/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-prob.svg +++ b/tests/testthat/_snaps/ppc-loo/ppc-loo-pit-ecdf-prob.svg @@ -25,9 +25,9 @@ - - - + + + diff --git a/tests/testthat/test-ppc-loo.R b/tests/testthat/test-ppc-loo.R index 2ade9806..d3d2d376 100644 --- a/tests/testthat/test-ppc-loo.R +++ b/tests/testthat/test-ppc-loo.R @@ -76,14 +76,13 @@ test_that("ppc_loo_pit_qq returns ggplot object", { test_that("ppc_loo_pit_ecdf returns a ggplot object", { skip_if_not_installed("rstanarm") skip_if_not_installed("loo") - expect_gg(p0 <- ppc_loo_pit_ecdf(y, yrep, lw, interval_method = "optimize", - eval_points = 100)) - expect_gg(p1 <- ppc_loo_pit_ecdf(y, yrep, lw, interval_method = "optimize")) - expect_gg(p2 <- ppc_loo_pit_ecdf(y, yrep, psis_object = psis1, interval_method = "optimize")) + expect_gg(p0 <- ppc_loo_pit_ecdf(y, yrep, lw)) + expect_gg(p1 <- ppc_loo_pit_ecdf(y, yrep, lw)) + expect_gg(p2 <- ppc_loo_pit_ecdf(y, yrep, psis_object = psis1)) expect_equal(p1$labels$x, "LOO PIT") expect_equal(p1$labels$y, "ECDF") expect_equal(p1$data, p2$data) - expect_gg(p3 <- ppc_loo_pit_ecdf(y, yrep, lw, plot_diff = TRUE, interval_method = "optimize")) + expect_gg(p3 <- ppc_loo_pit_ecdf(y, yrep, lw, plot_diff = TRUE)) expect_equal(p3$labels$y, "ECDF difference") }) @@ -286,32 +285,37 @@ test_that("ppc_loo_pit_ecdf renders correctly", { skip_if_not_installed("vdiffr") skip_if_not_installed("loo") skip_on_r_oldrel() - pit <- pmin(1, rstantools::loo_pit( - example_yrep_draws(), - example_y_data(), - matrix(log(1 / nrow(example_yrep_draws())), ncol = ncol(example_yrep_draws()), - nrow = nrow(example_yrep_draws())) - )) - p_base <- ppc_loo_pit_ecdf(pit = pit) + psis_object <- suppressWarnings(loo::psis(-vdiff_loo_lw)) + p_base <- ppc_loo_pit_ecdf( + vdiff_loo_y, + vdiff_loo_yrep, + psis_object = psis_object + ) vdiffr::expect_doppelganger("ppc_loo_pit_ecdf (default)", p_base) p_custom <- ppc_loo_pit_ecdf( - pit = pit, - eval_points = 200, + vdiff_loo_y, + vdiff_loo_yrep, + psis_object = psis_object, + K = 50 ) - vdiffr::expect_doppelganger("ppc_loo_pit_ecdf (eval_points)", p_custom) + vdiffr::expect_doppelganger("ppc_loo_pit_ecdf (K)", p_custom) p_custom <- ppc_loo_pit_ecdf( - pit = pit, - prob = 0.95, - eval_points = 500 + vdiff_loo_y, + vdiff_loo_yrep, + psis_object = psis_object, + prob = 0.95 ) vdiffr::expect_doppelganger("ppc_loo_pit_ecdf (prob)", p_custom) p_custom <- ppc_loo_pit_ecdf( - pit = pit, - plot_diff = TRUE + vdiff_loo_y, + vdiff_loo_yrep, + psis_object = psis_object, + plot_diff = TRUE, + K = 100 ) vdiffr::expect_doppelganger("ppc_loo_pit_ecdf (ecdf difference)", p_custom) }) From 4d387edf8d7edc07ce9770952428145ac4bb66e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20S=C3=A4ilynoja?= Date: Mon, 28 Apr 2025 11:51:22 +0300 Subject: [PATCH 5/6] Add description of ppc_loo_pit_ecdf --- R/ppc-loo.R | 10 +++++++++- man/PPC-loo.Rd | 10 +++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/R/ppc-loo.R b/R/ppc-loo.R index e1d6ec70..681d8fc4 100644 --- a/R/ppc-loo.R +++ b/R/ppc-loo.R @@ -27,7 +27,7 @@ #' #' @section Plot Descriptions: #' \describe{ -#' \item{`ppc_loo_pit_overlay()`, `ppc_loo_pit_qq()`}{ +#' \item{`ppc_loo_pit_overlay()`, `ppc_loo_pit_qq()`, `ppc_loo_pit_ecdf()`}{ #' The calibration of marginal predictions can be assessed using probability #' integral transformation (PIT) checks. LOO improves the check by avoiding the #' double use of data. See the section on marginal predictive checks in Gelman @@ -52,6 +52,14 @@ #' we have found that the overlaid density plot (`ppc_loo_pit_overlay()`) #' function will provide a clearer picture of calibration problems than the #' Q-Q plot. +#' +#' The `ppc_loo_pit_ecdf()` function visualizes the empirical cumulative +#' distribution function (ECDF) of the LOO PITs overlaid with simultaneous +#' confidence intervals for a standard uniform sample. For large samples, +#' these confidence intervals are visually very narrow. Setting the +#' `plot_diff` argument to `TRUE` transforms the plot to display the +#' difference of the ECDF and the theoretical expectation, which can aid in +#' the visual assessment of calibration. #' } #' \item{`ppc_loo_intervals()`, `ppc_loo_ribbon()`}{ #' Similar to [ppc_intervals()] and [ppc_ribbon()] but the intervals are for diff --git a/man/PPC-loo.Rd b/man/PPC-loo.Rd index 42ed4821..10f29d8c 100644 --- a/man/PPC-loo.Rd +++ b/man/PPC-loo.Rd @@ -234,7 +234,7 @@ for details. \section{Plot Descriptions}{ \describe{ -\item{\code{ppc_loo_pit_overlay()}, \code{ppc_loo_pit_qq()}}{ +\item{\code{ppc_loo_pit_overlay()}, \code{ppc_loo_pit_qq()}, \code{ppc_loo_pit_ecdf()}}{ The calibration of marginal predictions can be assessed using probability integral transformation (PIT) checks. LOO improves the check by avoiding the double use of data. See the section on marginal predictive checks in Gelman @@ -259,6 +259,14 @@ the (mis)calibration better for the extreme values. However, in most cases we have found that the overlaid density plot (\code{ppc_loo_pit_overlay()}) function will provide a clearer picture of calibration problems than the Q-Q plot. + +The \code{ppc_loo_pit_ecdf()} function visualizes the empirical cumulative +distribution function (ECDF) of the LOO PITs overlaid with simultaneous +confidence intervals for a standard uniform sample. For large samples, +these confidence intervals are visually very narrow. Setting the +\code{plot_diff} argument to \code{TRUE} transforms the plot to display the +difference of the ECDF and the theoretical expectation, which can aid in +the visual assessment of calibration. } \item{\code{ppc_loo_intervals()}, \code{ppc_loo_ribbon()}}{ Similar to \code{\link[=ppc_intervals]{ppc_intervals()}} and \code{\link[=ppc_ribbon]{ppc_ribbon()}} but the intervals are for From f8fab2f5cb769557d33b1388ac7571a49430f12c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20S=C3=A4ilynoja?= Date: Mon, 28 Apr 2025 11:51:29 +0300 Subject: [PATCH 6/6] Forward compatibility with ggplot2 --- tests/testthat/test-ppc-loo.R | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-ppc-loo.R b/tests/testthat/test-ppc-loo.R index d3d2d376..d6424cf2 100644 --- a/tests/testthat/test-ppc-loo.R +++ b/tests/testthat/test-ppc-loo.R @@ -67,10 +67,20 @@ test_that("ppc_loo_pit_qq returns ggplot object", { skip_if_not_installed("loo") expect_gg(p1 <- ppc_loo_pit_qq(y, yrep, lw)) expect_gg(p2 <- ppc_loo_pit_qq(y, yrep, psis_object = psis1)) - expect_equal(p1$labels$x, "Uniform") + if ("get_labs" %in% getNamespaceExports("ggplot2")) { + ll1 <- ggplot2::get_labs(p1) + } else { + ll1 <- p1$labels + } + expect_equal(ll1$x, "Uniform") expect_equal(p1$data, p2$data) expect_gg(p3 <- ppc_loo_pit_qq(y, yrep, lw, compare = "normal")) - expect_equal(p3$labels$x, "Normal") + if ("get_labs" %in% getNamespaceExports("ggplot2")) { + ll3 <- ggplot2::get_labs(p3) + } else { + ll3 <- p3$labels + } + expect_equal(ll3$x, "Normal") }) test_that("ppc_loo_pit_ecdf returns a ggplot object", { @@ -79,11 +89,21 @@ test_that("ppc_loo_pit_ecdf returns a ggplot object", { expect_gg(p0 <- ppc_loo_pit_ecdf(y, yrep, lw)) expect_gg(p1 <- ppc_loo_pit_ecdf(y, yrep, lw)) expect_gg(p2 <- ppc_loo_pit_ecdf(y, yrep, psis_object = psis1)) - expect_equal(p1$labels$x, "LOO PIT") - expect_equal(p1$labels$y, "ECDF") + if ("get_labs" %in% getNamespaceExports("ggplot2")) { + ll1 <- ggplot2::get_labs(p1) + } else { + ll1 <- p1$labels + } + expect_equal(ll1$x, "LOO PIT") + expect_equal(ll1$y, "ECDF") expect_equal(p1$data, p2$data) expect_gg(p3 <- ppc_loo_pit_ecdf(y, yrep, lw, plot_diff = TRUE)) - expect_equal(p3$labels$y, "ECDF difference") + if ("get_labs" %in% getNamespaceExports("ggplot2")) { + ll3 <- ggplot2::get_labs(p3) + } else { + ll3 <- p3$labels + } + expect_equal(ll3$y, "ECDF difference") }) test_that("ppc_loo_pit functions work when pit specified instead of y, yrep, and lw", {