diff --git a/DESCRIPTION b/DESCRIPTION index 753e7dd49a..d85c7f6c98 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -247,6 +247,7 @@ Collate: 'stat-bindot.R' 'stat-binhex.R' 'stat-boxplot.R' + 'stat-chain.R' 'stat-contour.R' 'stat-count.R' 'stat-density-2d.R' diff --git a/NAMESPACE b/NAMESPACE index b58765ecc1..5d94ac4a5c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -257,6 +257,7 @@ export(StatBin2d) export(StatBindot) export(StatBinhex) export(StatBoxplot) +export(StatChain) export(StatContour) export(StatContourFilled) export(StatCount) @@ -498,6 +499,7 @@ export(layer_grob) export(layer_scales) export(layer_sf) export(lims) +export(link_stat) export(map_data) export(margin) export(margin_auto) @@ -684,6 +686,7 @@ export(stat_bin_2d) export(stat_bin_hex) export(stat_binhex) export(stat_boxplot) +export(stat_chain) export(stat_contour) export(stat_contour_filled) export(stat_count) diff --git a/R/stat-chain.R b/R/stat-chain.R new file mode 100644 index 0000000000..6ae1bfc35d --- /dev/null +++ b/R/stat-chain.R @@ -0,0 +1,179 @@ +#' Chain statistic computation +#' +#' This statistic layer can take multiple stats and chain these together +#' to transform the data in a series of computations. +#' +#' @inheritParams layer +#' @inheritParams geom_point +#' @param stats A character vector or list of statistical transformations to use +#' for this layer. Every element needs to be one of the following: +#' * A `Stat` ggproto subclass, for example `StatCount` +#' * A string naming the stat. To give the stat as a string, strip the +#' function name of the `stat_` prefix. For example, to use `stat_count()`, +#' give the stat as `"count"`. +#' * The result of [`link_stat()`] to pass parameters or mapping instructions. +#' +#' @seealso [link_stat()] +#' @details +#' The procedure in which stats are chained are as follows. First, the +#' layer-level, undelayed aesthetics in the `mapping` argument are evaluated. +#' The data that results from that evaluation is passed to the first stat in +#' the `stats` argument to perform that stat's computation. If that first stat +#' is a [`link_stat`] with an `after.stat` component, the `after.stat` component +#' is evaluated before passing on the data to the next stat in the `stats` +#' argument. The next components in the `stats` argument work the same: the +#' data is passed on to compute the stat, then `after.stat` is evaluated. In +#' essence, the `after.stat` allows control over how computed variables are +#' passed to the next stat in the chain. Finally, once all components in the +#' `stats` arguments have been handled, the staged after stat components of +#' the layer-level `mapping` is evaluated. Per usual, the data are then handled +#' by the position and geom parts of a layer. +#' +#' @export +#' +#' @examples +#' p <- ggplot(mpg, aes(displ, colour = drv)) +#' # Binning unique observations +#' p + stat_chain(stats = c("unique", "bin")) +#' # Controlling parameters +#' p + stat_chain( +#' stats = list("unique", link_stat("bin", bins = 10)) +#' ) +#' # Evaluate expressions after computing stats +#' p + stat_chain(stats = list( +#' link_stat("unique", after.stat = aes(x = x + 1)), +#' link_stat("density", after.stat = aes(y = density)) +#' )) +#' # Note that the last `after.stat` argument serves the same role as the +#' # `after_stat()` function in the layer mapping, so the following is +#' # equivalent to the previous plot +#' p + stat_chain( +#' mapping = aes(y = after_stat(density)), +#' stats = list(link_stat("unique", after.stat = aes(x = x + 1)), "density") +#' ) +stat_chain <- function( + mapping = NULL, + data = NULL, + geom = "path", + position = "identity", + ..., + stats = "identity", + na.rm = FALSE, + show.legend = NA, + inherit.aes = TRUE) { + + layer( + data = data, + mapping = mapping, + stat = StatChain, + geom = geom, + position = position, + show.legend = show.legend, + inherit.aes = inherit.aes, + params = list2( + na.rm = na.rm, + stats = stats, + ... + ) + ) +} + +#' Parameterise a statistic computation +#' +#' This is a helper function for [`stat_chain()`] to pass parameters and declare +#' mappings. +#' +#' @param stat The statistical transformation to use on the data. The `stat` +#' argument accepts the following: +#' * A `Stat` ggproto subclass, for example `StatCount`. +#' * A string naming the stat. To give the stat as a string, strip the +#' function name of the `stat_` prefix. For example, for `stat_count()`, give +#' the string `"count"`. +#' @param ... Other arguments passed to the stat as a parameter. +#' @param after.stat Set of aesthetic mappings created by [`aes()`] to be +#' evaluated only after the stat has been computed. +#' +#' @seealso [stat_chain()] +#' @returns A list bundling the stat, parameters and mapping. +#' @export +#' @keywords internal +#' +#' @examples +#' # See `?stat_chain` +link_stat <- function(stat, ..., after.stat = aes()) { + if (inherits(stat, "linked_stat")) { + return(stat) + } + + stat <- validate_subclass(stat, "Stat") + + params <- list2(...) + extra <- setdiff(names(params), stat$parameters(TRUE)) + if (length(extra) > 0) { + cli::cli_warn("Ignoring unknown parameters: {.arg {extra}}.") + params <- params[setdiff(names(params), extra)] + } + + structure( + list(stat = stat, params = params, after_stat = validate_mapping(after.stat)), + class = "linked_stat" + ) +} + +#' @rdname ggplot2-ggproto +#' @format NULL +#' @usage NULL +#' @export +StatChain <- ggproto( + "StatChain", Stat, + + extra_params = c("na.rm", "stats"), + + setup_params = function(data, params) { + if (inherits(params$stats, "linked_stat")) { + # When a single linked stat is passed outside a list, repair to list + # When using a single stat, using the appropriate `stat_*()` constructor + # is better, but we should consider programmatic use too. + params$stats <- list(params$stats) + } + + params$stats <- lapply(params$stats, link_stat) + params + }, + + compute_layer = function(self, data, params, layout) { + + for (i in seq_along(params$stats)) { + link <- params$stats[[i]] + stat <- link$stat + + # Repeat `Layer$compute_statistic()` duty + computed_param <- stat$setup_params(data, link$params) + computed_param$na.rm <- computed_param$na.rm %||% params$na.rm + data <- stat$setup_data(data, computed_param) + data <- stat$compute_layer(data, computed_param, layout) + if (nrow(data) < 1) { + return(data) + } + + # Repeat `Layer$map_statistic()` duty, skipping backtransforms and such + aes <- stat$default_aes[is_calculated_aes(stat$default_aes)] + # TODO: ideally we'd have access to Layer$computed_mapping to properly + # not touch user-specified mappings. + aes <- aes[setdiff(names(aes), names(data))] + aes <- compact(defaults(link$after_stat, aes)) + if (length(aes) == 0) { + next + } + new <- eval_aesthetics(substitute_aes(aes), data) + check_nondata_cols( + new, aes, + problem = "Aesthetics must be valid computed stats.", + hint = "Did you specify the `redirect` argument correctly?" + ) + data[names(new)] <- new + } + + data + } +) diff --git a/_pkgdown.yml b/_pkgdown.yml index 0259312234..2ed79eca41 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -69,6 +69,7 @@ reference: - stat_unique - stat_sf_coordinates - stat_manual + - stat_chain - after_stat - subtitle: Position adjustment diff --git a/man/ggplot2-ggproto.Rd b/man/ggplot2-ggproto.Rd index 6658fdafb9..8f7227f890 100644 --- a/man/ggplot2-ggproto.Rd +++ b/man/ggplot2-ggproto.Rd @@ -21,11 +21,11 @@ % R/position-stack.R, R/scale-.R, R/scale-binned.R, R/scale-continuous.R, % R/scale-date.R, R/scale-discrete-.R, R/scale-identity.R, R/stat-align.R, % R/stat-bin.R, R/stat-summary-2d.R, R/stat-bin2d.R, R/stat-bindot.R, -% R/stat-binhex.R, R/stat-boxplot.R, R/stat-contour.R, R/stat-count.R, -% R/stat-density-2d.R, R/stat-density.R, R/stat-ecdf.R, R/stat-ellipse.R, -% R/stat-function.R, R/stat-identity.R, R/stat-manual.R, R/stat-qq-line.R, -% R/stat-qq.R, R/stat-quantilemethods.R, R/stat-smooth.R, R/stat-sum.R, -% R/stat-summary-bin.R, R/stat-summary-hex.R, R/stat-summary.R, +% R/stat-binhex.R, R/stat-boxplot.R, R/stat-chain.R, R/stat-contour.R, +% R/stat-count.R, R/stat-density-2d.R, R/stat-density.R, R/stat-ecdf.R, +% R/stat-ellipse.R, R/stat-function.R, R/stat-identity.R, R/stat-manual.R, +% R/stat-qq-line.R, R/stat-qq.R, R/stat-quantilemethods.R, R/stat-smooth.R, +% R/stat-sum.R, R/stat-summary-bin.R, R/stat-summary-hex.R, R/stat-summary.R, % R/stat-unique.R, R/stat-ydensity.R \docType{data} \name{ggplot2-ggproto} @@ -131,6 +131,7 @@ \alias{StatBindot} \alias{StatBinhex} \alias{StatBoxplot} +\alias{StatChain} \alias{StatContour} \alias{StatContourFilled} \alias{StatCount} diff --git a/man/link_stat.Rd b/man/link_stat.Rd new file mode 100644 index 0000000000..77a3d33009 --- /dev/null +++ b/man/link_stat.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/stat-chain.R +\name{link_stat} +\alias{link_stat} +\title{Parameterise a statistic computation} +\usage{ +link_stat(stat, ..., after.stat = aes()) +} +\arguments{ +\item{stat}{The statistical transformation to use on the data. The \code{stat} +argument accepts the following: +\itemize{ +\item A \code{Stat} ggproto subclass, for example \code{StatCount}. +\item A string naming the stat. To give the stat as a string, strip the +function name of the \code{stat_} prefix. For example, for \code{stat_count()}, give +the string \code{"count"}. +}} + +\item{...}{Other arguments passed to the stat as a parameter.} + +\item{after.stat}{Set of aesthetic mappings created by \code{\link[=aes]{aes()}} to be +evaluated only after the stat has been computed.} +} +\value{ +A list bundling the stat, parameters and mapping. +} +\description{ +This is a helper function for \code{\link[=stat_chain]{stat_chain()}} to pass parameters and declare +mappings. +} +\examples{ +# See `?stat_chain` +} +\seealso{ +\code{\link[=stat_chain]{stat_chain()}} +} +\keyword{internal} diff --git a/man/stat_chain.Rd b/man/stat_chain.Rd new file mode 100644 index 0000000000..a0917bda3b --- /dev/null +++ b/man/stat_chain.Rd @@ -0,0 +1,162 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/stat-chain.R +\name{stat_chain} +\alias{stat_chain} +\title{Chain statistic computation} +\usage{ +stat_chain( + mapping = NULL, + data = NULL, + geom = "path", + position = "identity", + ..., + stats = "identity", + na.rm = FALSE, + show.legend = NA, + inherit.aes = TRUE +) +} +\arguments{ +\item{mapping}{Set of aesthetic mappings created by \code{\link[=aes]{aes()}}. If specified and +\code{inherit.aes = TRUE} (the default), it is combined with the default mapping +at the top level of the plot. You must supply \code{mapping} if there is no plot +mapping.} + +\item{data}{The data to be displayed in this layer. There are three +options: + +If \code{NULL}, the default, the data is inherited from the plot +data as specified in the call to \code{\link[=ggplot]{ggplot()}}. + +A \code{data.frame}, or other object, will override the plot +data. All objects will be fortified to produce a data frame. See +\code{\link[=fortify]{fortify()}} for which variables will be created. + +A \code{function} will be called with a single argument, +the plot data. The return value must be a \code{data.frame}, and +will be used as the layer data. A \code{function} can be created +from a \code{formula} (e.g. \code{~ head(.x, 10)}).} + +\item{geom}{The geometric object to use to display the data for this layer. +When using a \verb{stat_*()} function to construct a layer, the \code{geom} argument +can be used to override the default coupling between stats and geoms. The +\code{geom} argument accepts the following: +\itemize{ +\item A \code{Geom} ggproto subclass, for example \code{GeomPoint}. +\item A string naming the geom. To give the geom as a string, strip the +function name of the \code{geom_} prefix. For example, to use \code{geom_point()}, +give the geom as \code{"point"}. +\item For more information and other ways to specify the geom, see the +\link[=layer_geoms]{layer geom} documentation. +}} + +\item{position}{A position adjustment to use on the data for this layer. This +can be used in various ways, including to prevent overplotting and +improving the display. The \code{position} argument accepts the following: +\itemize{ +\item The result of calling a position function, such as \code{position_jitter()}. +This method allows for passing extra arguments to the position. +\item A string naming the position adjustment. To give the position as a +string, strip the function name of the \code{position_} prefix. For example, +to use \code{position_jitter()}, give the position as \code{"jitter"}. +\item For more information and other ways to specify the position, see the +\link[=layer_positions]{layer position} documentation. +}} + +\item{...}{Other arguments passed on to \code{\link[=layer]{layer()}}'s \code{params} argument. These +arguments broadly fall into one of 4 categories below. Notably, further +arguments to the \code{position} argument, or aesthetics that are required +can \emph{not} be passed through \code{...}. Unknown arguments that are not part +of the 4 categories below are ignored. +\itemize{ +\item Static aesthetics that are not mapped to a scale, but are at a fixed +value and apply to the layer as a whole. For example, \code{colour = "red"} +or \code{linewidth = 3}. The geom's documentation has an \strong{Aesthetics} +section that lists the available options. The 'required' aesthetics +cannot be passed on to the \code{params}. Please note that while passing +unmapped aesthetics as vectors is technically possible, the order and +required length is not guaranteed to be parallel to the input data. +\item When constructing a layer using +a \verb{stat_*()} function, the \code{...} argument can be used to pass on +parameters to the \code{geom} part of the layer. An example of this is +\code{stat_density(geom = "area", outline.type = "both")}. The geom's +documentation lists which parameters it can accept. +\item Inversely, when constructing a layer using a +\verb{geom_*()} function, the \code{...} argument can be used to pass on parameters +to the \code{stat} part of the layer. An example of this is +\code{geom_area(stat = "density", adjust = 0.5)}. The stat's documentation +lists which parameters it can accept. +\item The \code{key_glyph} argument of \code{\link[=layer]{layer()}} may also be passed on through +\code{...}. This can be one of the functions described as +\link[=draw_key]{key glyphs}, to change the display of the layer in the legend. +}} + +\item{stats}{A character vector or list of statistical transformations to use +for this layer. Every element needs to be one of the following: +\itemize{ +\item A \code{Stat} ggproto subclass, for example \code{StatCount} +\item A string naming the stat. To give the stat as a string, strip the +function name of the \code{stat_} prefix. For example, to use \code{stat_count()}, +give the stat as \code{"count"}. +\item The result of \code{\link[=link_stat]{link_stat()}} to pass parameters or mapping instructions. +}} + +\item{na.rm}{If \code{FALSE}, the default, missing values are removed with +a warning. If \code{TRUE}, missing values are silently removed.} + +\item{show.legend}{logical. Should this layer be included in the legends? +\code{NA}, the default, includes if any aesthetics are mapped. +\code{FALSE} never includes, and \code{TRUE} always includes. +It can also be a named logical vector to finely select the aesthetics to +display. To include legend keys for all levels, even +when no data exists, use \code{TRUE}. If \code{NA}, all levels are shown in legend, +but unobserved levels are omitted.} + +\item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics, +rather than combining with them. This is most useful for helper functions +that define both data and aesthetics and shouldn't inherit behaviour from +the default plot specification, e.g. \code{\link[=borders]{borders()}}.} +} +\description{ +This statistic layer can take multiple stats and chain these together +to transform the data in a series of computations. +} +\details{ +The procedure in which stats are chained are as follows. First, the +layer-level, undelayed aesthetics in the \code{mapping} argument are evaluated. +The data that results from that evaluation is passed to the first stat in +the \code{stats} argument to perform that stat's computation. If that first stat +is a \code{\link{link_stat}} with an \code{after.stat} component, the \code{after.stat} component +is evaluated before passing on the data to the next stat in the \code{stats} +argument. The next components in the \code{stats} argument work the same: the +data is passed on to compute the stat, then \code{after.stat} is evaluated. In +essence, the \code{after.stat} allows control over how computed variables are +passed to the next stat in the chain. Finally, once all components in the +\code{stats} arguments have been handled, the staged after stat components of +the layer-level \code{mapping} is evaluated. Per usual, the data are then handled +by the position and geom parts of a layer. +} +\examples{ +p <- ggplot(mpg, aes(displ, colour = drv)) +# Binning unique observations +p + stat_chain(stats = c("unique", "bin")) +# Controlling parameters +p + stat_chain( + stats = list("unique", link_stat("bin", bins = 10)) +) +# Evaluate expressions after computing stats +p + stat_chain(stats = list( + link_stat("unique", after.stat = aes(x = x + 1)), + link_stat("density", after.stat = aes(y = density)) +)) +# Note that the last `after.stat` argument serves the same role as the +# `after_stat()` function in the layer mapping, so the following is +# equivalent to the previous plot +p + stat_chain( + mapping = aes(y = after_stat(density)), + stats = list(link_stat("unique", after.stat = aes(x = x + 1)), "density") +) +} +\seealso{ +\code{\link[=link_stat]{link_stat()}} +} diff --git a/tests/testthat/test-stat-chain.R b/tests/testthat/test-stat-chain.R new file mode 100644 index 0000000000..2d3fd18367 --- /dev/null +++ b/tests/testthat/test-stat-chain.R @@ -0,0 +1,28 @@ +test_that("stat_chain can chain multiple stats", { + + df <- data.frame(x = c(1, 1.9, 2.1, 3, 3, 3)) + + p <- ggplot(df, aes(x)) + + stat_chain( + stats = list(link_stat("bin", breaks = 0.5:3.5)) + ) + + stat_chain( + stats = list("unique", link_stat("bin", breaks = 0.5:3.5)), + ) + + stat_chain( + stats = list("unique", link_stat("bin", breaks = 0.5:3.5, after.stat = aes(y = -count))) + ) + p <- ggplot_build(p) + + ld <- get_layer_data(p, 1L) + expect_equal(ld$x, 1:3) + expect_equal(ld$y, 1:3) + + ld <- get_layer_data(p, 2L) + expect_equal(ld$x, 1:3) + expect_equal(ld$y, c(1, 2, 1)) + + ld <- get_layer_data(p, 3L) + expect_equal(ld$x, 1:3) + expect_equal(ld$y, c(-1, -2, -1)) +})