ModelOriented
diff --git a/‎NAMESPACE‎
Lines changed: 4 additions & 0 deletions b/‎NAMESPACE‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎R/kernelshap.R‎
Lines changed: 10 additions & 23 deletions b/‎R/kernelshap.R‎
Lines changed: 10 additions & 23 deletions
diff --git a/‎R/permshap.R‎
Lines changed: 171 additions & 0 deletions b/‎R/permshap.R‎
Lines changed: 171 additions & 0 deletions
diff --git a/‎R/utils.R‎
Lines changed: 101 additions & 1 deletion b/‎R/utils.R‎
Lines changed: 101 additions & 1 deletion
@@ -3,8 +3,12 @@
 S3method(kernelshap,Learner)
 S3method(kernelshap,default)
 S3method(kernelshap,ranger)
+S3method(permshap,Learner)
+S3method(permshap,default)
+S3method(permshap,ranger)
 S3method(print,kernelshap)
 S3method(summary,kernelshap)
 export(is.kernelshap)
 export(kernelshap)
+export(permshap)
 importFrom(foreach,"%dopar%")
@@ -68,7 +68,7 @@
 #'   In cases with a natural "off" value (like MNIST digits), 
 #'   this can also be a single row with all values set to the off value.
 #' @param pred_fun Prediction function of the form `function(object, X, ...)`,
-#'   providing \eqn{K \ge 1} numeric predictions per row. Its first argument 
+#'   providing \eqn{K \ge 1} predictions per row. Its first argument 
 #'   represents the model `object`, its second argument a data structure like `X`. 
 #'   Additional (named) arguments are passed via `...`. 
 #'   The default, [stats::predict()], will work in most cases. 
@@ -113,7 +113,7 @@
 #' @param max_iter If the stopping criterion (see `tol`) is not reached after 
 #'   `max_iter` iterations, the algorithm stops. Ignored if `exact = TRUE`.
 #' @param parallel If `TRUE`, use parallel [foreach::foreach()] to loop over rows
-#'   to be explained. Must register backend beforehand, e.g., via {doFuture} package, 
+#'   to be explained. Must register backend beforehand, e.g., via 'doFuture' package, 
 #'   see README for an example. Parallelization automatically disables the progress bar.
 #' @param parallel_args Named list of arguments passed to [foreach::foreach()]. 
 #'   Ideally, this is `NULL` (default). Only relevant if `parallel = TRUE`. 
@@ -191,31 +191,19 @@ kernelshap.default <- function(object, X, bg_X, pred_fun = stats::predict,
                                m = 2L * length(feature_names) * (1L + 3L * (hybrid_degree == 0L)), 
                                tol = 0.005, max_iter = 100L, parallel = FALSE, 
                                parallel_args = NULL, verbose = TRUE, ...) {
+  basic_checks(X = X, bg_X = bg_X, feature_names = feature_names, pred_fun = pred_fun)
+  p <- length(feature_names)
   stopifnot(
-    is.matrix(X) || is.data.frame(X),
-    is.matrix(bg_X) || is.data.frame(bg_X),
-    is.matrix(X) == is.matrix(bg_X),
-    dim(X) >= 1L,
-    dim(bg_X) >= 1L,
-    !is.null(colnames(X)),
-    !is.null(colnames(bg_X)),
-    (p <- length(feature_names)) >= 1L,
-    all(feature_names %in% colnames(X)),
-    all(feature_names %in% colnames(bg_X)),  # not necessary, but clearer
-    all(colnames(X) %in% colnames(bg_X)),
-    is.function(pred_fun),
     exact %in% c(TRUE, FALSE),
     p == 1L || exact || hybrid_degree %in% 0:(p / 2),
     paired_sampling %in% c(TRUE, FALSE),
     "m must be even" = trunc(m / 2) == m / 2
   )
+  p <- length(feature_names)
   n <- nrow(X)
   bg_n <- nrow(bg_X)
   if (!is.null(bg_w)) {
-    stopifnot(length(bg_w) == bg_n, all(bg_w >= 0), !all(bg_w == 0))
-    if (!is.double(bg_w)) {
-      bg_w <- as.double(bg_w)
-    }
+    bg_w <- prep_w(bg_w, bg_n = bg_n)
   }
   if (is.matrix(X) && !identical(colnames(X), feature_names)) {
     stop("If X is a matrix, feature_names must equal colnames(X)")  
@@ -224,15 +212,14 @@ kernelshap.default <- function(object, X, bg_X, pred_fun = stats::predict,
   # Calculate v1 and v0
   v1 <- align_pred(pred_fun(object, X, ...))         # Predictions on X:        n x K
   bg_preds <- align_pred(pred_fun(object, bg_X[, colnames(X), drop = FALSE], ...))
-  v0 <- weighted_colMeans(bg_preds, bg_w)            # Average pred of bg data: 1 x K
+  v0 <- wcolMeans(bg_preds, bg_w)                    # Average pred of bg data: 1 x K
 
   # For p = 1, exact Shapley values are returned
   if (p == 1L) {
-    return(
-      case_p1(
-        n = n, feature_names = feature_names, v0 = v0, v1 = v1, X = X, verbose = verbose
-      )
+    out <- case_p1(
+      n = n, feature_names = feature_names, v0 = v0, v1 = v1, X = X, verbose = verbose
     )
+    return(out)
   }
 
   # Drop unnecessary columns in bg_X. If X is matrix, also column order is relevant
 
@@ -0,0 +1,171 @@
+#' Permutation SHAP
+#'
+#' Exact permutation SHAP values with respect to a background dataset.
+#'
+#' @inheritParams kernelshap
+#' @returns
+#'   An object of class "permshap" with the following components:
+#'   - `S`: \eqn{(n \times p)} matrix with SHAP values or, if the model output has
+#'     dimension \eqn{K > 1}, a list of \eqn{K} such matrices.
+#'   - `X`: Same as input argument `X`.
+#'   - `baseline`: Vector of length K representing the average prediction on the
+#'     background data.
+#' @export
+#' @examples
+#' # MODEL ONE: Linear regression
+#' fit <- lm(Sepal.Length ~ ., data = iris)
+#'
+#' # Select rows to explain (only feature columns)
+#' X_explain <- iris[1:2, -1]
+#'
+#' # Select small background dataset (could use all rows here because iris is small)
+#' set.seed(1)
+#' bg_X <- iris[sample(nrow(iris), 100), ]
+#'
+#' # Calculate SHAP values
+#' s <- permshap(fit, X_explain, bg_X = bg_X)
+#' s
+#'
+#' # MODEL TWO: Multi-response linear regression
+#' fit <- lm(as.matrix(iris[1:2]) ~ Petal.Length + Petal.Width + Species, data = iris)
+#' s <- permshap(fit, iris[1:4, 3:5], bg_X = bg_X)
+#' s
+#'
+#' # Non-feature columns can be dropped via 'feature_names'
+#' s <- permshap(
+#'   fit,
+#'   iris[1:4, ],
+#'   bg_X = bg_X,
+#'   feature_names = c("Petal.Length", "Petal.Width", "Species")
+#' )
+#' s
+permshap <- function(object, ...) {
+  UseMethod("permshap")
+}
+
+#' @describeIn permshap Default permutation SHAP method.
+#' @export
+permshap.default <- function(object, X, bg_X, pred_fun = stats::predict,
+                             feature_names = colnames(X), bg_w = NULL,
+                             parallel = FALSE, parallel_args = NULL,
+                             verbose = TRUE, ...) {
+  basic_checks(X = X, bg_X = bg_X, feature_names = feature_names, pred_fun = pred_fun)
+  p <- length(feature_names)
+  stopifnot(p <= 14L)
+  n <- nrow(X)
+  bg_n <- nrow(bg_X)
+  if (!is.null(bg_w)) {
+    bg_w <- prep_w(bg_w, bg_n = bg_n)
+  }
+  if (is.matrix(X) && !identical(colnames(X), feature_names)) {
+    stop("If X is a matrix, feature_names must equal colnames(X)")
+  }
+  
+  if (verbose) {
+    message("Exact permutation SHAP values")
+  }
+  
+  # Baseline
+  bg_preds <- align_pred(pred_fun(object, bg_X[, colnames(X), drop = FALSE], ...))
+  v0 <- wcolMeans(bg_preds, bg_w)            # Average pred of bg data: 1 x K
+  
+  # Drop unnecessary columns in bg_X. If X is matrix, also column order is relevant
+  # Predictions will never be applied directly to bg_X anymore
+  if (!identical(colnames(bg_X), feature_names)) {
+    bg_X <- bg_X[, feature_names, drop = FALSE]
+  }
+  
+  # Precalculations that are identical for each row to be explained
+  Z <- exact_Z(p, feature_names = feature_names, keep_extremes = TRUE)
+  m_exact <- nrow(Z)
+  precalc <- list(
+    Z = Z,
+    Z_code = rowpaste(Z),
+    bg_X_rep = bg_X[rep(seq_len(bg_n), times = m_exact), , drop = FALSE]
+  )
+  
+  if (m_exact * bg_n > 2e5) {
+    warning("\nPredictions on large data sets with ", m_exact, "x", bg_n,
+            " observations are being done.\n",
+            "Consider reducing the computational burden (e.g. use smaller X_bg)")
+  }
+  
+  # Apply permutation SHAP to each row of X
+  if (isTRUE(parallel)) {
+    parallel_args <- c(list(i = seq_len(n)), parallel_args)
+    res <- do.call(foreach::foreach, parallel_args) %dopar% permshap_one(
+      x = X[i, , drop = FALSE],
+      object = object,
+      pred_fun = pred_fun,
+      bg_w = bg_w,
+      precalc = precalc,
+      ...
+    )
+  } else {
+    if (verbose && n >= 2L) {
+      pb <- utils::txtProgressBar(max = n, style = 3)
+    }
+    res <- vector("list", n)
+    for (i in seq_len(n)) {
+      res[[i]] <- permshap_one(
+        x = X[i, , drop = FALSE],
+        object = object,
+        pred_fun = pred_fun,
+        bg_w = bg_w,
+        precalc = precalc,
+        ...
+      )
+      if (verbose && n >= 2L) {
+        utils::setTxtProgressBar(pb, i)
+      }
+    }
+  }
+  out <- list(S = reorganize_list(res), X = X, baseline = as.vector(v0))
+  class(out) <- "permshap"
+  out
+}
+
+#' @describeIn permshap Permutation SHAP method for "ranger" models, see Readme for an example.
+#' @export
+permshap.ranger <- function(object, X, bg_X,
+                            pred_fun = function(m, X, ...) stats::predict(m, X, ...)$predictions,
+                            feature_names = colnames(X),
+                            bg_w = NULL, parallel = FALSE, parallel_args = NULL,
+                            verbose = TRUE, ...) {
+  permshap.default(
+    object = object,
+    X = X,
+    bg_X = bg_X,
+    pred_fun = pred_fun,
+    feature_names = feature_names,
+    bg_w = bg_w,
+    parallel = parallel,
+    parallel_args = parallel_args,
+    verbose = verbose,
+    ...
+  )
+}
+
+#' @describeIn permshap Permutation SHAP method for "mlr3" models, see Readme for an example.
+#' @export
+permshap.Learner <- function(object, X, bg_X,
+                             pred_fun = NULL,
+                             feature_names = colnames(X),
+                             bg_w = NULL, parallel = FALSE, parallel_args = NULL,
+                             verbose = TRUE, ...) {
+  if (is.null(pred_fun)) {
+    pred_fun <- mlr3_pred_fun(object, X = X)
+  }
+  permshap.default(
+    object = object,
+    X = X,
+    bg_X = bg_X,
+    pred_fun = pred_fun,
+    feature_names = feature_names,
+    bg_w = bg_w,
+    parallel = parallel,
+    parallel_args = parallel_args,
+    verbose = verbose,
+    ...
+  )
+}
@@ -8,7 +8,7 @@
 #' @param x A matrix-like object.
 #' @param w Optional case weights.
 #' @returns A (1 x ncol(x)) matrix of column means.
-weighted_colMeans <- function(x, w = NULL, ...) {
+wcolMeans <- function(x, w = NULL, ...) {
   if (NCOL(x) == 1L && is.null(w)) {
     return(as.matrix(mean(x)))
   }
@@ -18,6 +18,66 @@ weighted_colMeans <- function(x, w = NULL, ...) {
   rbind(if (is.null(w)) colMeans(x) else colSums(x * w) / sum(w))
 }
 
+#' All on-off Vectors
+#'
+#' Internal function that creates matrix of all on-off vectors of length `p`.
+#'
+#' @noRd
+#' @keywords internal
+#'
+#' @param p Number of features.
+#' @param feature_names Feature names.
+#' @param keep_extremes Should extremes be kept? Defaults to `FALSE` (for kernelshap).
+#' @returns An integer matrix of all on-off vectors of length `p`.
+exact_Z <- function(p, feature_names, keep_extremes = FALSE) {
+  Z <- as.matrix(do.call(expand.grid, replicate(p, 0:1, simplify = FALSE)))
+  colnames(Z) <- feature_names
+  if (keep_extremes) Z else Z[2:(nrow(Z) - 1L), , drop = FALSE]
+}
+
+#' Masker
+#'
+#' Internal function. 
+#' For each on-off vector (rows in `Z`), the (weighted) average prediction is returned.
+#'
+#' @noRd
+#' @keywords internal
+#'
+#' @inheritParams kernelshap
+#' @param X Row to be explained stacked m*n_bg times.
+#' @param bg Background data stacked m times.
+#' @param Z A (m x p) matrix with on-off values.
+#' @param w A vector with case weights (of the same length as the unstacked
+#'   background data).
+#' @returns A (m x K) matrix with vz values.
+get_vz <- function(X, bg, Z, object, pred_fun, w, ...) {
+  m <- nrow(Z)
+  not_Z <- !Z
+  n_bg <- nrow(bg) / m   # because bg was replicated m times
+  
+  # Replicate not_Z, so that X, bg, not_Z are all of dimension (m*n_bg x p)
+  g <- rep_each(m, each = n_bg)
+  not_Z <- not_Z[g, , drop = FALSE]
+  
+  if (is.matrix(X)) {
+    # Remember that columns of X and bg are perfectly aligned in this case
+    X[not_Z] <- bg[not_Z]
+  } else {
+    for (v in colnames(Z)) {
+      s <- not_Z[, v]
+      X[[v]][s] <- bg[[v]][s]
+    }
+  }
+  preds <- align_pred(pred_fun(object, X, ...))
+  
+  # Aggregate
+  if (is.null(w)) {
+    return(rowsum(preds, group = g, reorder = FALSE) / n_bg)
+  }
+  # w is recycled over rows and columns
+  rowsum(preds * w, group = g, reorder = FALSE) / sum(w)
+}
+
 #' Combine Matrices
 #'
 #' Binds list of matrices along new first axis.
@@ -183,6 +243,46 @@ fdummy <- function(x) {
   out 
 }
 
+#' Basic Input Checks
+#' 
+#' @noRd
+#' @keywords internal
+#' 
+#' @inheritParams kernelshap
+#' 
+#' @returns TRUE or an error
+basic_checks <- function(X, bg_X, feature_names, pred_fun) {
+  stopifnot(
+    is.matrix(X) || is.data.frame(X),
+    is.matrix(bg_X) || is.data.frame(bg_X),
+    is.matrix(X) == is.matrix(bg_X),
+    dim(X) >= 1L,
+    dim(bg_X) >= 1L,
+    !is.null(colnames(X)),
+    !is.null(colnames(bg_X)),
+    length(feature_names) >= 1L,
+    all(feature_names %in% colnames(X)),
+    all(feature_names %in% colnames(bg_X)),  # not necessary, but clearer
+    all(colnames(X) %in% colnames(bg_X)),
+    is.function(pred_fun)
+  )
+  TRUE
+}
+
+#' Prepare Case Weights
+#' 
+#' @noRd
+#' @keywords internal
+#' 
+#' @param w Vector of case weights.
+#' @param bg_n Number of rows in the background data.
+#' 
+#' @returns TRUE or an error
+prep_w <- function(w, bg_n) {
+  stopifnot(length(w) == bg_n, all(w >= 0), !all(w == 0))
+  if (!is.double(w)) as.double(w) else w
+}
+
 #' mlr3 Helper
 #' 
 #' Returns the prediction function of a mlr3 Learner.