@@ -98,6 +98,32 @@ validate_predictions <- function(predictions, n_obs = NULL) {
98
98
}
99
99
100
100
101
+ # ' Validate PIT
102
+ # '
103
+ # ' Checks that `pit` is numeric, doesn't have any NAs, and is either a vector,
104
+ # ' or a 1-D array with values in [0,1].
105
+ # '
106
+ # ' @param pit The 'pit' object from the user.
107
+ # ' @return Either throws an error or returns a numeric vector.
108
+ # ' @noRd
109
+ validate_pit <- function (pit ) {
110
+ stopifnot(is.numeric(pit ))
111
+
112
+ if (! is_vector_or_1Darray(pit )) {
113
+ abort(" 'pit' must be a vector or 1D array." )
114
+ }
115
+
116
+ if (any(pit > 1 ) || any(pit < 0 )) {
117
+ abort(" 'pit' must only contain values between 0 and 1." )
118
+ }
119
+
120
+ if (anyNA(pit )) {
121
+ abort(" NAs not allowed in 'pit'." )
122
+ }
123
+
124
+ unname(pit )
125
+ }
126
+
101
127
# ' Validate group
102
128
# '
103
129
# ' Checks that grouping variable has correct number of observations and is
@@ -267,6 +293,298 @@ melt_and_stack <- function(y, yrep) {
267
293
}
268
294
269
295
296
+ # ' Obtain the coverage parameter for simultaneous confidence bands for ECDFs
297
+ # '
298
+ # ' @param N Length of sample.
299
+ # ' @param L Number of chains. Used for MCMC, defaults to 1 for ppc.
300
+ # ' @param K Number of equally spaced evaluation points (1:K / K). Defaults to N.
301
+ # ' @param prob Desired simultaneous coverage (0,1).
302
+ # ' @param M number of simulations to run, if simulation method is used.
303
+ # ' @param interpolate_adj Boolean defining whether to interpolate the confidence
304
+ # ' bands from precomputed values. Interpolation provides a faster plot with the
305
+ # ' trade-off of possible loss of accuracy.
306
+ # ' @return The adjusted coverage parameter yielding the desired simultaneous
307
+ # ' coverage of the ECDF traces.
308
+ # ' @noRd
309
+ adjust_gamma <- function (N ,
310
+ L = 1 ,
311
+ K = N ,
312
+ prob = 0.99 ,
313
+ M = 1000 ,
314
+ interpolate_adj = FALSE ) {
315
+ if (! all_counts(c(K , N , L ))) {
316
+ abort(" Parameters 'N', 'L' and 'K' must be positive integers." )
317
+ }
318
+ if (prob > = 1 || prob < = 0 ) {
319
+ abort(" Value of 'prob' must be in (0,1)." )
320
+ }
321
+ if (interpolate_adj == TRUE ) {
322
+ gamma <- interpolate_gamma(N = N , K = K , prob = prob , L = L )
323
+ } else if (L == 1 ) {
324
+ gamma <- adjust_gamma_optimize(N = N , K = K , prob = prob )
325
+ } else {
326
+ gamma <- adjust_gamma_simulate(N = N , L = L , K = K , prob = prob , M = M )
327
+ }
328
+ gamma
329
+ }
330
+
331
+ # ' Adjust coverage parameter for single sample using the optimization method.
332
+ # ' @param N Length of sample.
333
+ # ' @param K Number of equally spaced evaluation points (1:K / K). Defaults to N.
334
+ # ' @param prob Desired simultaneous coverage (0,1).
335
+ # ' @return The adjusted coverage parameter yielding the desired simultaneous
336
+ # ' coverage of the ECDF traces.
337
+ # ' @noRd
338
+ adjust_gamma_optimize <- function (N , K , prob ) {
339
+ target <- function (gamma , prob , N , K ) {
340
+ z <- 1 : (K - 1 ) / K
341
+ z1 <- c(0 , z )
342
+ z2 <- c(z , 1 )
343
+
344
+ # pre-compute quantiles and use symmetry for increased efficiency.
345
+ x2_lower <- qbinom(gamma / 2 , N , z2 )
346
+ x2_upper <- c(N - rev(x2_lower )[2 : K ], 1 )
347
+
348
+ # Compute the total probability of trajectories inside the confidence
349
+ # intervals. Initialize the set and corresponding probabilities known
350
+ # to be 0 and 1 for the starting value z1 = 0.
351
+ x1 <- 0
352
+ p_int <- 1
353
+ for (i in seq_along(z1 )) {
354
+ p_int <- p_interior(
355
+ p_int = p_int ,
356
+ x1 = x1 ,
357
+ x2 = x2_lower [i ]: x2_upper [i ],
358
+ z1 = z1 [i ],
359
+ z2 = z2 [i ],
360
+ N = N
361
+ )
362
+ x1 <- x2_lower [i ]: x2_upper [i ]
363
+ }
364
+ return (abs(prob - sum(p_int )))
365
+ }
366
+ optimize(target , c(0 , 1 - prob ), prob = prob , N = N , K = K )$ minimum
367
+ }
368
+
369
+ # ' Adjust coverage parameter for multiple chains using the simulation method.
370
+ # ' In short, 'M' simulations of 'L' standard uniform chains are run and the
371
+ # ' confidence bands are set to cover 100 * 'prob' % of these simulations.
372
+ # ' @param N Length of sample.
373
+ # ' @param L Number of chains. Used for MCMC, defaults to 1 for ppc.
374
+ # ' @param K Number of equally spaced evaluation points (1:K / K). Defaults to N.
375
+ # ' @param prob Desired simultaneous coverage (0,1).
376
+ # ' @param M number of simulations to run.
377
+ # ' @return The adjusted coverage parameter yielding the desired simultaneous
378
+ # ' coverage of the ECDF traces.
379
+ # ' @noRd
380
+ adjust_gamma_simulate <- function (N , L , K , prob , M ) {
381
+ gamma <- numeric (M )
382
+ z <- (1 : (K - 1 )) / K # Rank ECDF evaluation points.
383
+ n <- N * (L - 1 )
384
+ k <- floor(z * N * L )
385
+ for (m in seq_len(M )) {
386
+ u <- u_scale(replicate(L , runif(N ))) # Fractional ranks of sample chains
387
+ scaled_ecdfs <- apply(outer(u , z , " <=" ), c(2 , 3 ), sum )
388
+ # Find the smalles marginal probability of the simulation run
389
+ gamma [m ] <- 2 * min(
390
+ apply(
391
+ scaled_ecdfs , 1 , phyper , m = N , n = n , k = k
392
+ ),
393
+ apply(
394
+ scaled_ecdfs - 1 , 1 , phyper , m = N , n = n , k = k , lower.tail = FALSE
395
+ )
396
+ )
397
+ }
398
+ alpha_quantile(gamma , 1 - prob )
399
+ }
400
+
401
+ # ' Approximate the required adjustement to obtain simultaneous confidence bands
402
+ # ' of an ECDF plot with interpolation with regards to N and K from precomputed
403
+ # ' values for a fixed set of prob and L values.
404
+ # ' @param N Length of sample.
405
+ # ' @param L Number of chains. Used for MCMC, defaults to 1 for ppc.
406
+ # ' @param prob Desired simultaneous coverage (0,1).
407
+ # ' @param K Number of equally spaced evaluation points (1:K / K). Defaults to N.
408
+ # ' @return The approximated adjusted coverage parameter yielding the desired
409
+ # ' simultaneous coverage of the ECDF traces.
410
+ # ' @noRd
411
+ interpolate_gamma <- function (N , K , prob , L ) {
412
+ # Find the precomputed values ueful for the interpolation task.
413
+ vals <- get_interpolation_values(N , K , L , prob )
414
+ # Largest lower bound and smalles upper bound for N among precomputed values.
415
+ N_lb <- max(vals [vals $ N < = N , ]$ N )
416
+ N_ub <- min(vals [vals $ N > = N , ]$ N )
417
+ # Approximate largest lower bound and smalles upper bound for gamma.
418
+ log_gamma_lb <- approx(
419
+ x = log(vals [vals $ N == N_lb , ]$ K ),
420
+ y = log(vals [vals $ N == N_lb , ]$ val ),
421
+ xout = log(K )
422
+ )$ y
423
+ log_gamma_ub <- approx(
424
+ x = log(vals [vals $ N == N_ub , ]$ K ),
425
+ y = log(vals [vals $ N == N_ub , ]$ val ),
426
+ xout = log(K )
427
+ )$ y
428
+ if (N_ub == N_lb ) {
429
+ log_gamma_approx <- log_gamma_lb
430
+ } else {
431
+ # Approximate log_gamma for the desired value of N.
432
+ log_gamma_approx <- approx(
433
+ x = log(c(N_lb , N_ub )),
434
+ y = c(log_gamma_lb , log_gamma_ub ),
435
+ xout = log(N )
436
+ )$ y
437
+ }
438
+ exp(log_gamma_approx )
439
+ }
440
+
441
+ # ' Filter the precomputed values useful for the interpolation task given to
442
+ # ' interpolate_gamma. Check, if the task is possible with the availabel data.
443
+ # ' @param N Length of sample.
444
+ # ' @param K Number of equally spaced evaluation points (1:K / K). Defaults to N.
445
+ # ' @param L Number of chains. Used for MCMC, defaults to 1 for ppc.
446
+ # ' @param prob Desired simultaneous coverage (0,1).
447
+ # ' @return A data.frame containing the relevant precomputed values.
448
+ # ' @noRd
449
+ get_interpolation_values <- function (N , K , L , prob ) {
450
+ for (dim in c(" L" , " prob" )) {
451
+ if (all(get(dim ) != bayesplot ::: gamma_adj [, dim ])) {
452
+ stop(paste(
453
+ " No precomputed values to interpolate from for '" , dim , " ' = " ,
454
+ get(dim ),
455
+ " .\n " ,
456
+ " Values of '" , dim , " ' available for interpolation: " ,
457
+ paste(unique(bayesplot ::: gamma_adj [, dim ]), collapse = " , " ),
458
+ " ." ,
459
+ sep = " "
460
+ ))
461
+ }
462
+ }
463
+ vals <- bayesplot ::: gamma_adj [
464
+ bayesplot ::: gamma_adj $ L == L & bayesplot ::: gamma_adj $ prob == prob ,
465
+ ]
466
+ if (N > max(vals $ N )) {
467
+ stop(paste(
468
+ " No precomputed values to interpolate from for sample length of " ,
469
+ N ,
470
+ " .\n " ,
471
+ " Please use a subsample of length " ,
472
+ max(vals $ N ),
473
+ " or smaller, or consider setting 'interpolate_adj' = FALSE." ,
474
+ sep = " "
475
+ ))
476
+ }
477
+ if (N < min(vals $ N )) {
478
+ stop(paste(
479
+ " No precomputed values to interpolate from for sample length of " ,
480
+ N ,
481
+ " .\n " ,
482
+ " Please use a subsample of length " ,
483
+ min(vals $ N ),
484
+ " or larger, or consider setting 'interpolate_adj' = FALSE." ,
485
+ sep = " "
486
+ ))
487
+ }
488
+ if (K > max(vals [vals $ N < = N , ]$ K )) {
489
+ stop(paste(
490
+ " No precomputed values available for interpolation for 'K' = " ,
491
+ K ,
492
+ " .\n " ,
493
+ " Try either setting a value of 'K' <= " ,
494
+ max(vals [vals $ N < = N , ]$ K ),
495
+ " or 'interpolate_adj' = FALSE." ,
496
+ sep = " "
497
+ ))
498
+ }
499
+ if (K < min(vals [vals $ N < = N , ]$ K )) {
500
+ stop(paste(
501
+ " No precomputed values available for interpolation for 'K' = " ,
502
+ K ,
503
+ " .\n " ,
504
+ " Try either setting a value of 'K' >= " ,
505
+ min(vals [vals $ N < = N , ]$ K ),
506
+ " or 'interpolate_adj' = FALSE." ,
507
+ sep = " "
508
+ ))
509
+ }
510
+ vals
511
+ }
512
+
513
+ # ' A helper function for 'adjust_gamma_optimize' defining the probability that
514
+ # ' a scaled ECDF stays within the supplied bounds between two evaluation points.
515
+ # ' @param p_int For each value in x1, the probability that the ECDF has stayed
516
+ # ' within the bounds until z1 and takes the value in x1 at z1.
517
+ # ' @param x1 Vector of scaled ECDF values at the left end of the interval, z1.
518
+ # ' @param x2 Vector of scaled ECDF values at the right end of the interval, z2.
519
+ # ' @param z1 Left evaluation point in [0,1]
520
+ # ' @param z2 Right evaluation point in [0,1] with z2 > z1.
521
+ # ' @param N Total number of values in the sample.
522
+ # ' @return A vector containing the probability to transitioning from the values
523
+ # ' in x1 to each of the values in x2 weighted by the probabilities in p_int.
524
+ # ' @noRd
525
+ p_interior <- function (p_int , x1 , x2 , z1 , z2 , N ) {
526
+ # Ratio between the length of the evaluation interval and the total length of
527
+ # the interval left to cover by ECDF.
528
+ z_tilde <- (z2 - z1 ) / (1 - z1 )
529
+ # Number of samples left to cover by ECDF.
530
+ N_tilde <- rep(N - x1 , each = length(x2 ))
531
+
532
+ p_int <- rep(p_int , each = length(x2 ))
533
+ x_diff <- outer(x2 , x1 , " -" )
534
+ # Pobability of each transition from a value in x1 to a value in x2.
535
+ p_x2_int <- p_int * dbinom(x_diff , N_tilde , z_tilde )
536
+ rowSums(p_x2_int )
537
+ }
538
+
539
+ # ' A helper function for 'adjust_alpha_simulate'
540
+ # ' 100 * `alpha` percent of the trials in 'gamma' are allowed to be rejected.
541
+ # ' In case of ties, return the largest value dominating at most
542
+ # ' 100 * (alpha + tol) percent of the values.
543
+ # ' @noRd
544
+ alpha_quantile <- function (gamma , alpha , tol = 0.001 ) {
545
+ a <- unname(quantile(gamma , probs = alpha ))
546
+ a_tol <- unname(quantile(gamma , probs = alpha + tol ))
547
+ if (a == a_tol ) {
548
+ if (min(gamma ) < a ) {
549
+ # take the largest value that doesn't exceed the tolerance.
550
+ a <- max(gamma [gamma < a ])
551
+ }
552
+ }
553
+ a
554
+ }
555
+
556
+ # ' Compute simultaneous confidence intervals with the given adjusted coverage
557
+ # ' parameter gamma.
558
+ # ' @param gamma Adjusted coverage parameter for the marginal distribution
559
+ # ' (binomial for PIT values and hypergeometric for rank transformed chains).
560
+ # ' @param N Sample length.
561
+ # ' @param K Number of uniformly spaced evaluation points.
562
+ # ' @param L Number of MCMC-chains. (1 for ppc)
563
+ # ' @return A list with upper and lower confidence interval values at the
564
+ # ' evaluation points.
565
+ # ' @noRd
566
+ ecdf_intervals <- function (gamma , N , K , L = 1 ) {
567
+ lims <- list ()
568
+ z <- seq(0 , 1 , length.out = K + 1 )
569
+ if (L == 1 ) {
570
+ lims $ lower <- qbinom(gamma / 2 , N , z )
571
+ lims $ upper <- qbinom(1 - gamma / 2 , N , z )
572
+ } else {
573
+ n <- N * (L - 1 )
574
+ k <- floor(z * L * N )
575
+ lims $ lower <- qhyper(gamma / 2 , N , n , k )
576
+ lims $ upper <- qhyper(1 - gamma / 2 , N , n , k )
577
+ }
578
+ lims
579
+ }
580
+
581
+ # ' Helper for 'adjust_gamma_simulate`
582
+ # ' Transforms observations in 'x' into their corresponding fractional ranks.
583
+ # ' @noRd
584
+ u_scale <- function (x ) {
585
+ array (rank(x ) / length(x ), dim = dim(x ), dimnames = dimnames(x ))
586
+ }
587
+
270
588
# labels ----------------------------------------------------------------
271
589
create_rep_ids <- function (ids ) paste(' italic(y)[rep] (' , ids , " )" )
272
590
y_label <- function () expression(italic(y ))
0 commit comments