diff --git a/NAMESPACE b/NAMESPACE index 4823d8df..1752f215 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -119,6 +119,10 @@ export(ppc_bars) export(ppc_bars_data) export(ppc_bars_grouped) export(ppc_boxplot) +export(ppc_calibration) +export(ppc_calibration_grouped) +export(ppc_calibration_overlay) +export(ppc_calibration_overlay_grouped) export(ppc_data) export(ppc_dens) export(ppc_dens_overlay) @@ -142,6 +146,8 @@ export(ppc_intervals_data) export(ppc_intervals_grouped) export(ppc_km_overlay) export(ppc_km_overlay_grouped) +export(ppc_loo_calibration) +export(ppc_loo_calibration_grouped) export(ppc_loo_intervals) export(ppc_loo_pit) export(ppc_loo_pit_data) diff --git a/R/ppc-calibration.R b/R/ppc-calibration.R new file mode 100644 index 00000000..c26d1202 --- /dev/null +++ b/R/ppc-calibration.R @@ -0,0 +1,224 @@ +# x' PPC calibration +#' +#' Assess the calibration of the predictive distributions `yrep` in relation to +#' the data `y'. +#' See the **Plot Descriptions** section, below, for details. +#' +#' @name PPC-calibration +#' @family PPCs +#' +#' @template args-y-yrep +#' @template args-group +#' +#' @template return-ggplot-or-data +#' +#' @section Plot Descriptions: +#' \describe{ +#' \item{`ppc_calibration()`,`ppc_calibration_grouped()`}{ +#' +#' }, +#' \item{`ppc_calibration_overlay()`,`ppc_calibration_overlay_grouped()`}{ +#' +#' }, +#' \item{`ppc_loo_calibration()`,`ppc_loo_calibration_grouped()`}{ +#' +#' } +#' } +#' +#' @examples +#' color_scheme_set("brightblue") +#' +#' # Make an example dataset of binary observations +#' ymin <- range(example_y_data(), example_yrep_draws())[1] +#' ymax <- range(example_y_data(), example_yrep_draws())[2] +#' y <- rbinom(length(example_y_data()), 1, (example_y_data() - ymin) / (ymax - ymin)) +#' prep <- (example_yrep_draws() - ymin) / (ymax - ymin) +#' +#' ppc_calibration_overlay(y, prep[1:50, ]) +NULL + + +#' @rdname PPC-calibration +#' @export +ppc_calibration_overlay <- function( + y, prep, ..., linewidth = 0.25, alpha = 0.5) { + check_ignored_arguments(...) + data <- .ppc_calibration_data(y, prep) + ggplot(data) + + geom_abline(color = "black", linetype = 2) + + geom_line( + aes(value, cep, group = rep_id, color = "yrep"), + linewidth = linewidth, alpha = alpha + ) + + scale_color_ppc() + + bayesplot_theme_get() + + legend_none() + + coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) + + xlab("Predicted probability") + + ylab("Conditional event probability") + + NULL +} + +#' @rdname PPC-calibration +#' @export +ppc_calibration_overlay_grouped <- function( + y, prep, group, ..., linewidth = 0.25, alpha = 0.7) { + check_ignored_arguments(...) + data <- .ppc_calibration_data(y, prep, group) + ggplot(data) + + geom_abline(color = "black", linetype = 2) + + geom_line(aes(value, cep, group = rep_id, color = "yrep"), + linewidth = linewidth, alpha = alpha + ) + + facet_wrap(vars(group)) + + scale_color_ppc() + + bayesplot_theme_get() + + legend_none() + + coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) + + xlab("Predicted probability") + + ylab("Conditional event probability") + + NULL +} + +#' @rdname PPC-calibration +#' @export +ppc_calibration <- function( + y, prep, prob = .95, show_mean = TRUE, ..., linewidth = 0.5, alpha = 0.7) { + check_ignored_arguments(...) + data <- .ppc_calibration_data(y, prep) %>% + group_by(y_id) %>% + summarise( + value = median(value), + lb = quantile(cep, .5 - .5 * prob), + ub = quantile(cep, .5 + .5 * prob), + cep = median(cep) + ) + + ggplot(data) + + aes(value, cep) + + geom_abline(color = "black", linetype = 2) + + geom_ribbon(aes(ymin = lb, ymax = ub, fill = "yrep"), alpha = alpha) + + geom_line( + aes(color = "y"), + linewidth = linewidth + ) + + scale_color_ppc() + + scale_fill_ppc() + + bayesplot_theme_get() + + legend_none() + + coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) + + xlab("Predicted probability") + + ylab("Conditional event probability") + + NULL +} + +#' @rdname PPC-calibration +#' @export +ppc_calibration_grouped <- function( + y, prep, group, prob = .95, show_mean = TRUE, ..., linewidth = 0.5, alpha = 0.7) { + check_ignored_arguments(...) + data <- .ppc_calibration_data(y, prep, group) %>% + group_by(group, y_id) %>% + summarise( + value = median(value), + lb = quantile(cep, .5 - .5 * prob), + ub = quantile(cep, .5 + .5 * prob), + cep = median(cep) + ) + + ggplot(data) + + aes(value, cep) + + geom_abline(color = "black", linetype = 2) + + geom_ribbon(aes(ymin = lb, ymax = ub, fill = "yrep"), alpha = alpha) + + geom_line( + aes(color = "y"), + linewidth = linewidth + ) + + facet_wrap(vars(group)) + + scale_color_ppc() + + scale_fill_ppc() + + bayesplot_theme_get() + + legend_none() + + coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) + + xlab("Predicted probability") + + ylab("Conditional event probability") + + NULL +} + +#' @rdname PPC-calibration +#' @export +ppc_loo_calibration <- function( + y, prep, lw, ..., linewidth = 0.25, alpha = 0.7) { + check_ignored_arguments(...) + data <- .ppc_calibration_data(y, prep) + ggplot(data) + + geom_abline(color = "black", linetype = 2) + + geom_line( + aes(value, cep, group = rep_id, color = "yrep"), + linewidth = linewidth, alpha = alpha + ) + + scale_color_ppc() + + bayesplot_theme_get() + + legend_none() + + coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) + + xlab("Predicted probability") + + ylab("Conditional event probability") + + NULL +} + +#' @rdname PPC-calibration +#' @export +ppc_loo_calibration_grouped <- function( + y, prep, group, lw, ..., linewidth = 0.25, alpha = 0.7) { + check_ignored_arguments(...) + data <- .ppc_calibration_data(y, prep, group) + ggplot(data) + + geom_abline(color = "black", linetype = 2) + + geom_line(aes(value, cep, group = rep_id, color = "yrep"), + linewidth = linewidth, alpha = alpha + ) + + facet_wrap(vars(group)) + + scale_color_ppc() + + bayesplot_theme_get() + + legend_none() + + coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) + + xlab("Predicted probability") + + ylab("Conditional event probability") + + NULL +} + +.ppc_calibration_data <- function(y, prep, group = NULL) { + y <- validate_y(y) + n_obs <- length(y) + prep <- validate_predictions(prep, n_obs) + if (any(prep > 1 | prep < 0)) { + stop("Values of ´prep´ should be predictive probabilities between 0 and 1.") + } + if (!is.null(group)) { + group <- validate_group(group, n_obs) + } else { + group <- rep(1, n_obs * nrow(prep)) + } + + if (requireNamespace("monotone", quietly = TRUE)) { + monotone <- monotone::monotone + } else { + monotone <- function(y) { + stats::isoreg(y)$yf + } + } + .ppd_data(prep, group = group) %>% + group_by(group, rep_id) %>% + mutate( + ord = order(value), + value = value[ord], + cep = monotone(y[ord]) + ) |> + ungroup() +} + +.loo_resample_data <- function(prep, lw, psis_object) { + lw <- .get_lw(lw, psis_object) + stopifnot(identical(dim(prep), dim(lw))) + # Work in progress here... +} diff --git a/man/PPC-calibration.Rd b/man/PPC-calibration.Rd new file mode 100644 index 00000000..fbd9a0c3 --- /dev/null +++ b/man/PPC-calibration.Rd @@ -0,0 +1,115 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ppc-calibration.R +\name{PPC-calibration} +\alias{PPC-calibration} +\alias{ppc_calibration_overlay} +\alias{ppc_calibration_overlay_grouped} +\alias{ppc_calibration} +\alias{ppc_calibration_grouped} +\alias{ppc_loo_calibration} +\alias{ppc_loo_calibration_grouped} +\title{Assess the calibration of the predictive distributions \code{yrep} in relation to +the data `y'. +See the \strong{Plot Descriptions} section, below, for details.} +\usage{ +ppc_calibration_overlay(y, prep, ..., linewidth = 0.25, alpha = 0.5) + +ppc_calibration_overlay_grouped( + y, + prep, + group, + ..., + linewidth = 0.25, + alpha = 0.7 +) + +ppc_calibration(y, prep, prob = 0.95, ..., linewidth = 0.5, alpha = 0.7) + +ppc_calibration_grouped( + y, + prep, + group, + prob = 0.95, + ..., + linewidth = 0.5, + alpha = 0.7 +) + +ppc_loo_calibration(y, yrep, lw, ..., linewidth = 0.25, alpha = 0.7) + +ppc_loo_calibration_grouped( + y, + yrep, + group, + lw, + ..., + linewidth = 0.25, + alpha = 0.7 +) +} +\arguments{ +\item{y}{A vector of observations. See \strong{Details}.} + +\item{group}{A grouping variable of the same length as \code{y}. +Will be coerced to \link[base:factor]{factor} if not already a factor. +Each value in \code{group} is interpreted as the group level pertaining +to the corresponding observation.} + +\item{yrep}{An \code{S} by \code{N} matrix of draws from the posterior (or prior) +predictive distribution. The number of rows, \code{S}, is the size of the +posterior (or prior) sample used to generate \code{yrep}. The number of columns, +\code{N} is the number of predicted observations (\code{length(y)}). The columns of +\code{yrep} should be in the same order as the data points in \code{y} for the plots +to make sense. See the \strong{Details} and \strong{Plot Descriptions} sections for +additional advice specific to particular plots.} +} +\value{ +The plotting functions return a ggplot object that can be further +customized using the \strong{ggplot2} package. The functions with suffix +\verb{_data()} return the data that would have been drawn by the plotting +function. +} +\description{ +Assess the calibration of the predictive distributions \code{yrep} in relation to +the data `y'. +See the \strong{Plot Descriptions} section, below, for details. +} +\section{Plot Descriptions}{ + +\describe{ +\item{\code{ppc_calibration()},\code{ppc_calibration_grouped()}}{ + +}, +\item{\code{ppc_calibration_overlay()},\code{ppc_calibration_overlay_grouped()}}{ + +}, +\item{\code{ppc_loo_calibration()},\code{ppc_loo_calibration_grouped()}}{ + +} +} +} + +\examples{ +color_scheme_set("brightblue") + +# Make an example dataset of binary observations +ymin <- range(example_y_data(), example_yrep_draws())[1] +ymax <- range(example_y_data(), example_yrep_draws())[2] +y <- rbinom(length(example_y_data()), 1, (example_y_data() - ymin) / (ymax - ymin)) +prep <- (example_yrep_draws() - ymin) / (ymax - ymin) + +ppc_calibration_overlay(y, prep[1:50, ]) +} +\seealso{ +Other PPCs: +\code{\link{PPC-censoring}}, +\code{\link{PPC-discrete}}, +\code{\link{PPC-distributions}}, +\code{\link{PPC-errors}}, +\code{\link{PPC-intervals}}, +\code{\link{PPC-loo}}, +\code{\link{PPC-overview}}, +\code{\link{PPC-scatterplots}}, +\code{\link{PPC-test-statistics}} +} +\concept{PPCs} diff --git a/man/PPC-censoring.Rd b/man/PPC-censoring.Rd index ddf011b4..30a7b78d 100644 --- a/man/PPC-censoring.Rd +++ b/man/PPC-censoring.Rd @@ -150,6 +150,7 @@ doi:10.1080/01621459.1958.10501452. } \seealso{ Other PPCs: +\code{\link{PPC-calibration}}, \code{\link{PPC-discrete}}, \code{\link{PPC-distributions}}, \code{\link{PPC-errors}}, diff --git a/man/PPC-discrete.Rd b/man/PPC-discrete.Rd index 434ba7bd..4f2fcabd 100644 --- a/man/PPC-discrete.Rd +++ b/man/PPC-discrete.Rd @@ -216,6 +216,7 @@ Visualizing count data regressions using rootograms. } \seealso{ Other PPCs: +\code{\link{PPC-calibration}}, \code{\link{PPC-censoring}}, \code{\link{PPC-distributions}}, \code{\link{PPC-errors}}, diff --git a/man/PPC-distributions.Rd b/man/PPC-distributions.Rd index 628a3ad5..991a9742 100644 --- a/man/PPC-distributions.Rd +++ b/man/PPC-distributions.Rd @@ -389,6 +389,7 @@ Press, London, third edition. (Ch. 6) } \seealso{ Other PPCs: +\code{\link{PPC-calibration}}, \code{\link{PPC-censoring}}, \code{\link{PPC-discrete}}, \code{\link{PPC-errors}}, diff --git a/man/PPC-errors.Rd b/man/PPC-errors.Rd index 047590bf..c3196f26 100644 --- a/man/PPC-errors.Rd +++ b/man/PPC-errors.Rd @@ -227,6 +227,7 @@ Press, London, third edition. (Ch. 6) } \seealso{ Other PPCs: +\code{\link{PPC-calibration}}, \code{\link{PPC-censoring}}, \code{\link{PPC-discrete}}, \code{\link{PPC-distributions}}, diff --git a/man/PPC-intervals.Rd b/man/PPC-intervals.Rd index ddac2ebc..369ab029 100644 --- a/man/PPC-intervals.Rd +++ b/man/PPC-intervals.Rd @@ -240,6 +240,7 @@ Press, London, third edition. (Ch. 6) } \seealso{ Other PPCs: +\code{\link{PPC-calibration}}, \code{\link{PPC-censoring}}, \code{\link{PPC-discrete}}, \code{\link{PPC-distributions}}, diff --git a/man/PPC-loo.Rd b/man/PPC-loo.Rd index 10f29d8c..3113fab8 100644 --- a/man/PPC-loo.Rd +++ b/man/PPC-loo.Rd @@ -349,6 +349,7 @@ https://www.jstor.org/stable/2986005. } \seealso{ Other PPCs: +\code{\link{PPC-calibration}}, \code{\link{PPC-censoring}}, \code{\link{PPC-discrete}}, \code{\link{PPC-distributions}}, diff --git a/man/PPC-overview.Rd b/man/PPC-overview.Rd index 16d3f0ff..f58465d2 100644 --- a/man/PPC-overview.Rd +++ b/man/PPC-overview.Rd @@ -124,6 +124,7 @@ Press, London, third edition. (Ch. 6) } \seealso{ Other PPCs: +\code{\link{PPC-calibration}}, \code{\link{PPC-censoring}}, \code{\link{PPC-discrete}}, \code{\link{PPC-distributions}}, diff --git a/man/PPC-scatterplots.Rd b/man/PPC-scatterplots.Rd index 2c22bbd3..2e274273 100644 --- a/man/PPC-scatterplots.Rd +++ b/man/PPC-scatterplots.Rd @@ -155,6 +155,7 @@ Press, London, third edition. (Ch. 6) } \seealso{ Other PPCs: +\code{\link{PPC-calibration}}, \code{\link{PPC-censoring}}, \code{\link{PPC-discrete}}, \code{\link{PPC-distributions}}, diff --git a/man/PPC-test-statistics.Rd b/man/PPC-test-statistics.Rd index 83883979..c9368794 100644 --- a/man/PPC-test-statistics.Rd +++ b/man/PPC-test-statistics.Rd @@ -213,6 +213,7 @@ Press, London, third edition. (Ch. 6) } \seealso{ Other PPCs: +\code{\link{PPC-calibration}}, \code{\link{PPC-censoring}}, \code{\link{PPC-discrete}}, \code{\link{PPC-distributions}},