add univariate cox filter + test

bblodfon · bblodfon · commit c4cbb2e3c24f · 2023-10-27T15:58:36.000+02:00
diff --git a/R/FilterUnivariateCox.R b/R/FilterUnivariateCox.R
@@ -0,0 +1,91 @@
+#' @title Univariate Cox Survival Filter
+#'
+#' @name mlr_filters_univariatecox
+#'
+#' @description Calculates scores for assessing the relationship between
+#' individual features and the time-to-event outcome (right-censored survival
+#' data) using a univariate Cox proportional hazards model.
+#' The goal is to determine which features have a statistically significant
+#' association with the event of interest, typically in the context of clinical
+#' or biomedical research.
+#'
+#' This filter fits a [CoxPH][mlr3proba::LearnerSurvCoxPH()] learner using each
+#' feature independently and extracts the \eqn{p}-value that quantifies the
+#' significance of the feature's impact on survival. The filter value is
+#' `-log10(p)` where `p` is the \eqn{p}-value. This transformation is necessary
+#' to ensure numerical stability for very small \eqn{p}-values. Also higher
+#' values denote more important features.
+#'
+#' @family Filter
+#' @include Filter.R
+#' @template seealso_filter
+#' @export
+#' @examples
+#' if (requireNamespace("mlr3proba")) {
+#'   task = tsk("rats")
+#'   filter = flt("univariatecox")
+#'   filter$calculate(task)
+#'   as.data.table(filter)
+#' }
+#'
+#' if (mlr3misc::require_namespaces(c("mlr3pipelines", "mlr3proba"), quietly = TRUE)) {
+#'   library("mlr3pipelines")
+#'   task = tsk("rats")
+#'
+#'   # Note: `filter.cutoff` is selected randomly and should be tuned.
+#'   # The significance level of `0.05` serves as a conventional threshold.
+#'   # The filter returns the `-log`-transformed scores so we transform
+#'   # the cutoff as well:
+#'   cutoff = -log(0.05) # ~2.99
+#'
+#'   graph =
+#'     po("filter", filter = flt("univariatecox"), filter.cutoff = cutoff) %>>%
+#'     po("learner", lrn("surv.coxph"))
+#'   learner = as_learner(graph)
+#'
+#'   learner$train(task)
+#'
+#'   # univariate cox filter scores
+#'   learner$model$surv.univariatecox$scores
+#'
+#'   # only two features had a score larger than the specified `cutoff` and
+#'   # were used to train the CoxPH model
+#'   learner$model$surv.coxph$train_task$feature_names
+#' }
+FilterUnivariateCox = R6Class("FilterUnivariateCox",
+  inherit = Filter,
+  public = list(
+    #' @description Create a FilterUnivariateCox object.
+    initialize = function() {
+      super$initialize(
+        id = "surv.univariatecox",
+        packages = c("mlr3proba"),
+        param_set = ps(),
+        feature_types = c("integer", "numeric", "factor"),
+        task_types = "surv",
+        label = "Univariate Cox Survival Score",
+        man = "mlr3filters::mlr_filters_univariatecox"
+      )
+    }
+  ),
+
+  private = list(
+    .calculate = function(task, nfeat) {
+      t = task$clone()
+      features = t$feature_names
+      learner = lrn("surv.coxph")
+
+      scores = map_dbl(features, function(feature) {
+        t$col_roles$feature = feature
+        learner$train(t)
+        pval = summary(learner$model)$coefficients[, "Pr(>|z|)"]
+        -log(pval) # smaller p-values => larger scores
+      })
+
+      set_names(scores, features)
+    }
+  )
+)
+
+#' @include mlr_filters.R
+mlr_filters$add("univariatecox", FilterUnivariateCox)
diff --git a/tests/testthat/test_FilterUnivariateCox.R b/tests/testthat/test_FilterUnivariateCox.R
@@ -0,0 +1,21 @@
+skip_if_not_installed("mlr3proba")
+
+test_that("FilterUnivariateCox", {
+  t = tsk("rats")
+  f = flt("univariatecox")
+  f$calculate(t)
+
+  expect_filter(f, task = t)
+  expect_true(all(f$scores >= 0))
+
+  # works with 2-level factors (but not 3-level ones)
+  feature = "sex"
+  expect_class(t$data(cols = feature)[[1]], "factor")
+
+  l = lrn("surv.coxph")
+  t2 = t$clone()
+  t2$col_roles$feature = feature
+  l$train(t2)
+
+  expect_equal(-log(summary(l$model)$coefficients[,"Pr(>|z|)"]), f$scores[[feature]])
+})