Skip to content

Commit 680b7ae

Browse files
authored
Merge pull request #318 from stan-dev/discrete_PIT_in_ppc_pit_ecdf
Limit maximum number of evaluation points in ppc_pit_ecdf functions by default to 1000.
2 parents c97cd58 + 5bdbccc commit 680b7ae

File tree

9 files changed

+118
-111
lines changed

9 files changed

+118
-111
lines changed

R/helpers-ppc.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ adjust_gamma <- function(N,
319319
abort("Value of 'prob' must be in (0,1).")
320320
}
321321
if (is.null(interpolate_adj)) {
322-
if (K <= 200) {
322+
if (K <= 200 || N < 100) {
323323
interpolate_adj <- FALSE
324324
} else {
325325
interpolate_adj <- TRUE

R/ppc-distributions.R

Lines changed: 30 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,11 @@
4949
#' both, depending on the `y_draw` argument.
5050
#' }
5151
#' \item{`ppc_pit_ecdf()`, `ppc_pit_ecdf_grouped()`}{
52-
#' The ECDF of the empirical PIT values of `y` computed with respect to the
53-
#' corresponding `yrep` values. `100 * prob`% central simultaneous confidence
54-
#' intervals are provided to asses if `y` and `yrep` originate from the same
55-
#' distribution. The PIT values can also be provided directly as `pit`.
52+
#' The PIT-ECDF of the empirical PIT values of `y` computed with respect to
53+
#' the corresponding `yrep` values. `100 * prob`% central simultaneous
54+
#' confidence intervals are provided to asses if `y` and `yrep` originate
55+
#' from the same distribution. The PIT values can also be provided directly
56+
#' as `pit`.
5657
#' See Säilynoja et al. (2021) for more details.}
5758
#' }
5859
#'
@@ -73,8 +74,8 @@
7374
#' # ppc_ecdf_overlay with continuous data (set discrete=TRUE if discrete data)
7475
#' ppc_ecdf_overlay(y, yrep[sample(nrow(yrep), 25), ])
7576
#'
76-
#' # ECDF and ECDF difference plot of the PIT values of y compared to yrep
77-
#' # with 99% simultaneous confidence bands.
77+
#' # PIT-ECDF and PIT-ECDF difference plot of the PIT values of y compared to
78+
#' # yrep with 99% simultaneous confidence bands.
7879
#' ppc_pit_ecdf(y, yrep, prob = 0.99, plot_diff = FALSE)
7980
#' ppc_pit_ecdf(y, yrep, prob = 0.99, plot_diff = TRUE)
8081
#' }
@@ -107,9 +108,9 @@
107108
#' ppc_ecdf_overlay_grouped(y, yrep[1:25, ], group = group)
108109
#'
109110
#' \donttest{
110-
#' # ECDF difference plots of the PIT values by group
111+
#' # PIT-ECDF plots of the PIT values by group
111112
#' # with 99% simultaneous confidence bands.
112-
#' ppc_pit_ecdf_grouped(y, yrep, group=group, prob=0.99, plot_diff = TRUE)
113+
#' ppc_pit_ecdf_grouped(y, yrep, group=group, prob=0.99)
113114
#' }
114115
#'
115116
#' \donttest{
@@ -612,7 +613,7 @@ ppc_pit_ecdf <- function(y,
612613
) %>%
613614
unlist()
614615
if (is.null(K)) {
615-
K <- nrow(yrep) + 1
616+
K <- min(nrow(yrep) + 1, 1000)
616617
}
617618
} else {
618619
inform("'pit' specified so ignoring 'y', and 'yrep' if specified.")
@@ -631,7 +632,7 @@ ppc_pit_ecdf <- function(y,
631632
lims <- ecdf_intervals(gamma = gamma, N = N, K = K)
632633
ggplot() +
633634
aes(
634-
x = 1:K / K,
635+
x = seq(0,1,length.out = K),
635636
y = ecdf(pit)(seq(0, 1, length.out = K)) -
636637
(plot_diff == TRUE) * seq(0, 1, length.out = K),
637638
color = "y"
@@ -679,7 +680,7 @@ ppc_pit_ecdf_grouped <-
679680
) %>%
680681
unlist()
681682
if (is.null(K)) {
682-
K <- nrow(yrep) + 1
683+
K <- min(nrow(yrep) + 1, 1000)
683684
}
684685
} else {
685686
inform("'pit' specified so ignoring 'y' and 'yrep' if specified.")
@@ -691,7 +692,7 @@ ppc_pit_ecdf_grouped <-
691692
N_g <- sum(group == g)
692693
adjust_gamma(
693694
N = N_g,
694-
K = min(N_g, K),
695+
K = ifelse(is.null(K), N_g, K),
695696
prob = prob,
696697
interpolate_adj = interpolate_adj
697698
)
@@ -700,21 +701,23 @@ ppc_pit_ecdf_grouped <-
700701

701702
data <- data.frame(pit = pit, group = group) %>%
702703
group_by(group) %>%
703-
dplyr::group_map(~ data.frame(
704-
ecdf_value = ecdf(.x$pit)(seq(0, 1, length.out = min(nrow(.x), K))),
705-
group = .y[1],
706-
lims_upper = ecdf_intervals(
707-
gamma = gammas[[unlist(.y[1])]],
708-
N = nrow(.x),
709-
K = min(nrow(.x), K)
710-
)$upper[-1] / nrow(.x),
711-
lims_lower = ecdf_intervals(
712-
gamma = gammas[[unlist(.y[1])]],
713-
N = nrow(.x),
714-
K = min(nrow(.x), K)
715-
)$lower[-1] / nrow(.x),
716-
x = seq(0, 1, length.out = min(nrow(.x), K))
717-
)) %>%
704+
dplyr::group_map(
705+
~ data.frame(
706+
ecdf_value = ecdf(.x$pit)(seq(0, 1, length.out = ifelse(is.null(K), nrow(.x), K))),
707+
group = .y[1],
708+
lims_upper = ecdf_intervals(
709+
gamma = gammas[[unlist(.y[1])]],
710+
N = nrow(.x),
711+
K = ifelse(is.null(K), nrow(.x), K)
712+
)$upper[-1] / nrow(.x),
713+
lims_lower = ecdf_intervals(
714+
gamma = gammas[[unlist(.y[1])]],
715+
N = nrow(.x),
716+
K = ifelse(is.null(K), nrow(.x), K)
717+
)$lower[-1] / nrow(.x),
718+
x = seq(0, 1, length.out = ifelse(is.null(K), nrow(.x), K))
719+
)
720+
) %>%
718721
dplyr::bind_rows()
719722

720723
ggplot(data) +

man-roxygen/args-pit-ecdf.R

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
11
#' @param K An optional integer defining the number of equally spaced evaluation
2-
#' points for the ECDF. Reducing K when using `interpolate_adj = FALSE` makes
3-
#' computing the confidence bands faster. For `ppc_pit_ecdf` and
4-
#' `ppc_pit_ecdf_grouped`, defaults to `ncol(yrep) + 1`, or `length(pit)` if PIT
5-
#' values are supplied. For `mcmc_rank_ecdf` defaults to the number of
6-
#' iterations per chain in `x`.
2+
#' points for the PIT-ECDF. Reducing K when using `interpolate_adj = FALSE`
3+
#' makes computing the confidence bands faster. For `ppc_pit_ecdf` and
4+
#' `ppc_pit_ecdf_grouped`, if PIT values are supplied, defaults to
5+
#' `length(pit)`, otherwise yrep determines the maximum accuracy of the
6+
#' estimated PIT values and `Ḱ` is set to `min(nrow(yrep) + 1, 1000)`. For
7+
#' `mcmc_rank_ecdf`, defaults to the number of iterations per chain in `x`.
78
#' @param prob The desired simultaneous coverage level of the bands around the
89
#' ECDF. A value in (0,1).
910
#' @param plot_diff A boolean defining whether to plot the difference between
10-
#' the observed ECDF and the theoretical expectation for uniform PIT values
11-
#' rather than plotting the regular ECDF. The default is `FALSE`, but for
12-
#' large samples we recommend setting `plot_diff=TRUE` as the difference plot
13-
#' will visually show a more dynamic range.
11+
#' the observed PIT- ECDF and the theoretical expectation for uniform PIT
12+
#' values rather than plotting the regular ECDF. The default is `FALSE`, but
13+
#' for large samples we recommend setting `plot_diff=TRUE` as the difference
14+
#' plot will visually show a more dynamic range.
1415
#' @param interpolate_adj A boolean defining if the simultaneous confidence
1516
#' bands should be interpolated based on precomputed values rather than
1617
#' computed exactly. Computing the bands may be computationally intensive and

man/MCMC-traces.Rd

Lines changed: 6 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/PPC-distributions.Rd

Lines changed: 19 additions & 17 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-default.svg

Lines changed: 8 additions & 8 deletions
Loading

tests/testthat/_snaps/ppc-distributions/ppc-pit-ecdf-diff.svg

Lines changed: 8 additions & 8 deletions
Loading

0 commit comments

Comments
 (0)