Skip to content

Commit d64fa63

Browse files
authored
feat: add boruta filter (#163)
* feat: add boruta filter * docs: update documentation * fix: donttest * fix: donttest * docs: citation * docs: ordering * fix: id * fix: non ascii * chore: michels improvements
1 parent 76dd164 commit d64fa63

30 files changed

+237
-1
lines changed

DESCRIPTION

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ Imports:
3131
paradox,
3232
R6
3333
Suggests:
34+
Boruta,
3435
care,
3536
caret,
3637
carSurv,
@@ -55,6 +56,7 @@ Collate:
5556
'mlr_filters.R'
5657
'FilterAUC.R'
5758
'FilterAnova.R'
59+
'FilterBoruta.R'
5860
'FilterCMIM.R'
5961
'FilterCarScore.R'
6062
'FilterCarSurvScore.R'

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ S3method(as.data.table,Filter)
55
export(Filter)
66
export(FilterAUC)
77
export(FilterAnova)
8+
export(FilterBoruta)
89
export(FilterCMIM)
910
export(FilterCarScore)
1011
export(FilterCarSurvScore)

R/FilterBoruta.R

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
#' @title Burota Filter
2+
#'
3+
#' @name mlr_filters_boruta
4+
#'
5+
#' @description
6+
#' Filter using the Boruta algorithm for feature selection.
7+
#' If `keep = "tentative"`, confirmed and tentative features are returned.
8+
#' Note that there is no ordering in the selected features.
9+
#' Selected features get a score of 1, deselected features get a score of 0.
10+
#' The order of selected features is random.
11+
#' In combination with \CRANpkg{mlr3pipelines}, only the filter criterion `cutoff` makes sense.
12+
#'
13+
#' @references
14+
#' `r format_bib("kursa_2010")`
15+
#'
16+
#' @family Filter
17+
#' @include Filter.R
18+
#' @template seealso_filter
19+
#' @export
20+
#' @examples
21+
#' \donttest{
22+
#' if (requireNamespace("Boruta")) {
23+
#' task = mlr3::tsk("sonar")
24+
#' filter = flt("boruta")
25+
#' filter$calculate(task)
26+
#' as.data.table(filter)
27+
#' }
28+
#' }
29+
30+
FilterBoruta = R6Class("FilterBoruta",
31+
inherit = Filter,
32+
33+
public = list(
34+
35+
#' @description
36+
#' Creates a new instance of this [R6][R6::R6Class] class.
37+
initialize = function() {
38+
39+
param_set = ps(
40+
pValue = p_dbl(default = 0.01),
41+
mcAdj = p_lgl(default = TRUE),
42+
maxRuns = p_int(lower = 1, default = 100),
43+
doTrace = p_int(lower = 0, upper = 4, default = 0),
44+
holdHistory = p_lgl(default = TRUE),
45+
getImp = p_uty(),
46+
keep = p_fct(levels = c("confirmed", "tentative"), default = "confirmed")
47+
)
48+
49+
param_set$set_values(keep = "confirmed")
50+
51+
super$initialize(
52+
id = "boruta",
53+
task_types = c("regr", "classif"),
54+
param_set = param_set,
55+
packages = "Boruta",
56+
feature_types = c("integer", "numeric"),
57+
label = "Burota",
58+
man = "mlr3filters::mlr_filters_boruta"
59+
)
60+
}
61+
),
62+
63+
private = list(
64+
.calculate = function(task, nfeat) {
65+
pv = self$param_set$values
66+
data = task$data()
67+
target = task$target_names
68+
features = task$feature_names
69+
formula = formulate(target, features)
70+
keep = pv$keep
71+
pv$keep = NULL
72+
73+
res = invoke(Boruta::Boruta, formula = formula, data = data, .args = pv)
74+
75+
selected_features = Boruta::getSelectedAttributes(res, withTentative = (keep == "tentative"))
76+
77+
set_names(as.numeric(features %in% selected_features), features)
78+
}
79+
)
80+
)
81+
82+
83+
#' @include mlr_filters.R
84+
mlr_filters$add("boruta", FilterBoruta)

R/bibentries.R

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,5 +34,14 @@ bibentries = c(
3434
author = "Andrea Bommert and Thomas Welchowski and Matthias Schmid and J\u00f6rg Rahnenf\u00fchrer",
3535
title = "Benchmark of filter methods for feature selection in high-dimensional gene expression survival data",
3636
journal = "Briefings in Bioinformatics"
37-
)
37+
),
38+
39+
kursa_2010 = bibentry("article",
40+
title = "Feature Selection with the Boruta Package",
41+
volume = "36",
42+
number = "11",
43+
journal = "Journal of Statistical Software",
44+
author = "Miron B. Kursa and Witold R. Rudnicki",
45+
year = "2010",
46+
pages = "1-13")
3847
)

man/Filter.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/mlr_filters.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/mlr_filters_anova.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/mlr_filters_auc.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/mlr_filters_boruta.Rd

Lines changed: 110 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/mlr_filters_carscore.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/mlr_filters_carsurvscore.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/mlr_filters_cmim.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/mlr_filters_correlation.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/mlr_filters_disr.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/mlr_filters_find_correlation.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/mlr_filters_importance.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/mlr_filters_information_gain.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/mlr_filters_jmi.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/mlr_filters_jmim.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/mlr_filters_kruskal_test.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/mlr_filters_mim.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/mlr_filters_mrmr.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/mlr_filters_njmim.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/mlr_filters_performance.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/mlr_filters_permutation.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)