ModelOriented
diff --git a/‎DESCRIPTION‎
Lines changed: 1 addition & 1 deletion b/‎DESCRIPTION‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎NEWS.md‎
Lines changed: 11 additions & 0 deletions b/‎NEWS.md‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎R/from_hstats.R‎
Lines changed: 42 additions & 0 deletions b/‎R/from_hstats.R‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎R/kernelshap.R‎
Lines changed: 3 additions & 0 deletions b/‎R/kernelshap.R‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎R/utils.R‎
Lines changed: 8 additions & 8 deletions b/‎R/utils.R‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎README.md‎
Lines changed: 37 additions & 2 deletions b/‎README.md‎
Lines changed: 37 additions & 2 deletions
@@ -1,6 +1,6 @@
 Package: kernelshap
 Title: Kernel SHAP
-Version: 0.3.8
+Version: 0.4.0
 Authors@R: c(
     person("Michael", "Mayer", , "mayermichael79@gmail.com", role = c("aut", "cre")),
     person("David", "Watson", , "david.s.watson11@gmail.com", role = "aut"),
 
@@ -1,3 +1,14 @@
+# kernelshap 0.4.0
+
+## Major changes
+
+- Factor valued predictions are now supported. Each level is represented by its dummy variable.
+
+## Other changes
+
+- Slight speed-up.
+- Integer valued case-weights are now turned into doubles to avoid integer overflow.
+
 # kernelshap 0.3.8
 
 ## API improvements
 
@@ -0,0 +1,42 @@
+# These functions have originally been implemented in {hstats}
+
+#' Fast Index Generation
+#' 
+#' For not too small m, much faster than `rep(seq_len(m), each = each)`.
+#' 
+#' @noRd
+#' @keywords internal
+#' 
+#' @param m Integer. See `each`.
+#' @param each Integer. How many times should each value in `1:m` be repeated?
+#' @returns Like `x`, but converted to matrix.
+#' @examples
+#' rep_each(10, 2)
+#' rep(1:10, each = 2)  # Dito
+rep_each <- function(m, each) {
+  out <- .col(dim = c(each, m))
+  dim(out) <- NULL
+  out 
+}
+
+#' Fast OHE
+#' 
+#' Turns vector/factor into its One-Hot-Encoding.
+#' Ingeniouly written by Mathias Ambuehl.
+#' 
+#' Working with integers instead of doubles would be slightly faster, but at the price
+#' of potential integer overflows in subsequent calculations.
+#' 
+#' @noRd
+#' @keywords internal
+#' 
+#' @param x Object representing model predictions.
+#' @returns Like `x`, but converted to matrix.
+fdummy <- function(x) {
+  x <- as.factor(x)
+  lev <- levels(x)
+  out <- matrix(0, nrow = length(x), ncol = length(lev))
+  out[cbind(seq_along(x), as.integer(x))] <- 1
+  colnames(out) <- lev
+  out 
+}
@@ -213,6 +213,9 @@ kernelshap.default <- function(object, X, bg_X, pred_fun = stats::predict,
   bg_n <- nrow(bg_X)
   if (!is.null(bg_w)) {
     stopifnot(length(bg_w) == bg_n, all(bg_w >= 0), !all(bg_w == 0))
+    if (!is.double(bg_w)) {
+      bg_w <- as.double(bg_w)
+    }
   }
   if (is.matrix(X) && !identical(colnames(X), feature_names)) {
     stop("If X is a matrix, feature_names must equal colnames(X)")  
 
@@ -141,7 +141,7 @@ get_vz <- function(X, bg, Z, object, pred_fun, w, ...) {
   n_bg <- nrow(bg) / m   # because bg was replicated m times
 
   # Replicate not_Z, so that X, bg, not_Z are all of dimension (m*n_bg x p)
-  g <- rep(seq_len(m), each = n_bg)
+  g <- rep_each(m, each = n_bg)  # from_hstats.R
   not_Z <- not_Z[g, , drop = FALSE]
 
   if (is.matrix(X)) {
@@ -175,7 +175,7 @@ get_vz <- function(X, bg, Z, object, pred_fun, w, ...) {
 #' @returns A (1 x ncol(x)) matrix of column means.
 weighted_colMeans <- function(x, w = NULL, ...) {
   if (NCOL(x) == 1L && is.null(w)) {
-    return(matrix(mean(x)))
+    return(as.matrix(mean(x)))
   }
   if (!is.matrix(x)) {
     x <- as.matrix(x)
@@ -226,21 +226,21 @@ reorganize_list <- function(alist) {
 
 #' Aligns Predictions
 #'
-#' Turns predictions into matrix. Originally implemented in {hstats}.
+#' Turns predictions into matrix.
 #'
 #' @noRd
 #' @keywords internal
 #'
 #' @param x Object representing model predictions.
 #' @returns Like `x`, but converted to matrix.
 align_pred <- function(x) {
-  if (!is.matrix(x)) {
-    x <- as.matrix(x)
+  if (is.data.frame(x) && ncol(x) == 1L) {
+    x <- x[[1L]]
   }
-  if (!is.numeric(x)) {
-    stop("Predictions must be numeric")
+  if (is.factor(x)) {
+    return(fdummy(x))  # from_hstats.R
   }
-  x
+  if (is.matrix(x)) x else as.matrix(x)
 }
 
 #' Head of List Elements
 
@@ -26,6 +26,7 @@ If the training data is small, use the full training data. In cases with a natur
 **Remarks**
 
 - Multivariate predictions are handled at no additional computational cost.
+- Factor-valued predictions are automatically turned into one-hot-encoded columns.
 - By changing the defaults, the iterative pure sampling approach in [2] can be enforced.
 - Case weights are supported via the argument `bg_w`.
 
@@ -46,8 +47,8 @@ Let's model diamonds prices!
 ### Linear regression
 
 ```r
-library(ggplot2)
 library(kernelshap)
+library(ggplot2)
 library(shapviz)
 
 diamonds <- transform(
@@ -221,6 +222,40 @@ shap_gam
 # [2,] -0.5153642 -0.1080045  0.11967804 0.031341595
 ```
 
+## Multi-output models
+
+{kernelshap} supports multivariate predictions, such as:
+- probabilistic classification,
+- non-probabilistic classification (factor-valued responses are turned into dummies),
+- regression with multivariate response, and
+- predictions found by applying multiple regression models.
+
+### Classification
+
+We use {ranger} to fit a probabilistic and a non-probabilistic classification model.
+
+```r
+library(kernelshap)
+library(ranger)
+library(shapviz)
+
+# Probabilistic
+fit_prob <- ranger(Species ~ ., data = iris, num.trees = 20, probability = TRUE, seed = 1)
+ks_prob <- kernelshap(fit_prob, X = iris, bg_X = iris) |> 
+  shapviz()
+sv_importance(ks_prob)
+
+# Non-probabilistic: Predictions are factors
+fit_class <- ranger(Species ~ ., data = iris, num.trees = 20, seed = 1)
+ks_class <- kernelshap(fit_class, X = iris, bg_X = iris) |> 
+  shapviz()
+sv_importance(ks_class)
+```
+
+![](man/figures/README-prob-class.svg)
+
+![](man/figures/README-fact-class.svg)
+
 ## Meta-learning packages
 
 Here, we provide some working examples for "tidymodels", "caret", and "mlr3".
@@ -283,7 +318,7 @@ fit_lm$train(task_iris)
 s <- kernelshap(fit_lm, iris[-1], bg_X = iris)
 s
 
-# Probabilistic classification -> lrn(..., predict_type = "prob")
+# *Probabilistic* classification -> lrn(..., predict_type = "prob")
 task_iris <- TaskClassif$new(id = "class", backend = iris, target = "Species")
 fit_rf <- lrn("classif.ranger", predict_type = "prob", num.trees = 50)
 fit_rf$train(task_iris)