From 872472465ef142e2fbc40a2e50502e5b6b64b7e3 Mon Sep 17 00:00:00 2001 From: be-marc Date: Wed, 4 Jun 2025 19:41:26 +0200 Subject: [PATCH 01/10] ... --- R/ObjectiveFSelectAsync.R | 42 ++++++++++++++++++++++++++++++++++++--- R/ObjectiveFSelectBatch.R | 15 ++++++++++++-- 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/R/ObjectiveFSelectAsync.R b/R/ObjectiveFSelectAsync.R index 2aca31fc..05983c11 100644 --- a/R/ObjectiveFSelectAsync.R +++ b/R/ObjectiveFSelectAsync.R @@ -16,6 +16,33 @@ #' @export ObjectiveFSelectAsync = R6Class("ObjectiveFSelectAsync", inherit = ObjectiveFSelect, + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function( + task, + learner, + resampling, + measures, + check_values = TRUE, + store_benchmark_result = TRUE, + store_models = FALSE, + callbacks = NULL + ) { + super$initialize( + task = task, + learner = learner, + resampling = resampling, + measures = measures, + store_benchmark_result = store_benchmark_result, + store_models = store_models, + check_values = check_values, + callbacks = callbacks + ) + + private$.aggregator = if (all(c("requires_task", "requires_learner", "requires_model", "requires_train_set") %nin% self$measures$properties) && self$codomain$length == 1) async_aggregator_fast else async_aggregator_default + } + ), private = list( .eval = function(xs, resampling) { lg$debug("Evaluating feature subset %s", as_short_string(xs)) @@ -37,8 +64,8 @@ ObjectiveFSelectAsync = R6Class("ObjectiveFSelectAsync", lg$debug("Aggregating performance") - # aggregate performance - private$.aggregated_performance = as.list(private$.resample_result$aggregate(self$measures)) + # aggregate performance using the appropriate aggregator + private$.aggregated_performance = as.list(private$.aggregator(private$.resample_result, self$measures)) lg$debug("Aggregated performance %s", as_short_string(private$.aggregated_performance)) @@ -61,6 +88,15 @@ ObjectiveFSelectAsync = R6Class("ObjectiveFSelectAsync", .xs = NULL, .resample_result = NULL, - .aggregated_performance = NULL + .aggregated_performance = NULL, + .aggregator = NULL ) ) + +async_aggregator_default = function(resample_result, measures) { + resample_result$aggregate(measures) +} + +async_aggregator_fast = function(resample_result, measures) { + mlr3::faggregate(resample_result, measures[[1]]) +} diff --git a/R/ObjectiveFSelectBatch.R b/R/ObjectiveFSelectBatch.R index 31ae0027..44fa8df9 100644 --- a/R/ObjectiveFSelectBatch.R +++ b/R/ObjectiveFSelectBatch.R @@ -51,6 +51,8 @@ ObjectiveFSelectBatch = R6Class("ObjectiveFSelectBatch", check_values = check_values, callbacks = callbacks ) + + private$.aggregator = if (all(c("requires_task", "requires_learner", "requires_model", "requires_train_set") %nin% self$measures$properties) && self$codomain$length == 1) aggregator_fast else aggregator_default } ), @@ -80,7 +82,7 @@ ObjectiveFSelectBatch = R6Class("ObjectiveFSelectBatch", lg$debug("Aggregating performance") # aggregate performance scores - private$.aggregated_performance = private$.benchmark_result$aggregate(self$measures, conditions = TRUE)[, c(self$codomain$target_ids, "warnings", "errors"), with = FALSE] + private$.aggregated_performance = private$.aggregator(private$.benchmark_result, self$measures, self$codomain) lg$debug("Aggregated performance %s", as_short_string(private$.aggregated_performance)) @@ -106,6 +108,15 @@ ObjectiveFSelectBatch = R6Class("ObjectiveFSelectBatch", .design = NULL, .benchmark_result = NULL, .aggregated_performance = NULL, - .model_required = FALSE + .model_required = FALSE, + .aggregator = NULL ) ) + +aggregator_default = function(benchmark_result, measures, codomain) { + benchmark_result$aggregate(measures, conditions = TRUE)[, c(codomain$target_ids, "warnings", "errors"), with = FALSE] +} + +aggregator_fast = function(benchmark_result, measures, codomain) { + mlr3::faggregate(benchmark_result, measures[[1]]) +} \ No newline at end of file From c1396d79bc40b4749e95f2b9a472c3f609ce3982 Mon Sep 17 00:00:00 2001 From: be-marc Date: Thu, 5 Jun 2025 10:38:58 +0200 Subject: [PATCH 02/10] feat: add fast aggregation --- DESCRIPTION | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 40ca6dcb..d8eaab9f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,7 +27,7 @@ Depends: mlr3 (>= 0.23.0), R (>= 3.1.0) Imports: - bbotk (>= 1.5.0.9000), + bbotk (>= 1.6.0), checkmate (>= 2.0.0), cli, data.table, @@ -47,6 +47,8 @@ Suggests: rush, rush (>= 0.2.0), testthat (>= 3.0.0) +Remotes: + mlr-org/mlr3@aggregate_fast Config/testthat/edition: 3 Config/testthat/parallel: false Encoding: UTF-8 From f9d60e367ebb31f4d8abac1f007539c3ca075c62 Mon Sep 17 00:00:00 2001 From: be-marc Date: Thu, 5 Jun 2025 12:14:36 +0200 Subject: [PATCH 03/10] ... --- R/FSelectInstanceAsyncSingleCrit.R | 7 ++- R/FSelectInstanceBatchSingleCrit.R | 7 ++- R/ObjectiveFSelectAsync.R | 14 ++++-- R/ObjectiveFSelectBatch.R | 12 ++++- R/fselect.R | 10 ++-- R/sugar.R | 14 ++++-- man-roxygen/param_aggregate_fast.R | 5 ++ man/FSelectInstanceAsyncSingleCrit.Rd | 9 +++- man/FSelectInstanceBatchSingleCrit.Rd | 9 +++- man/ObjectiveFSelectAsync.Rd | 61 ++++++++++++++++++++++- man/ObjectiveFSelectBatch.Rd | 9 +++- man/fselect.Rd | 9 +++- man/fsi.Rd | 9 +++- man/fsi_async.Rd | 9 +++- man/mlr_fselectors_async_random_search.Rd | 5 -- 15 files changed, 161 insertions(+), 28 deletions(-) create mode 100644 man-roxygen/param_aggregate_fast.R diff --git a/R/FSelectInstanceAsyncSingleCrit.R b/R/FSelectInstanceAsyncSingleCrit.R index 0148eee6..1da6d1a4 100644 --- a/R/FSelectInstanceAsyncSingleCrit.R +++ b/R/FSelectInstanceAsyncSingleCrit.R @@ -19,6 +19,7 @@ #' @template param_callbacks #' @template param_rush #' @template param_ties_method +#' @template param_aggregate_fast #' #' @template param_xdt #' @template param_extra @@ -41,7 +42,8 @@ FSelectInstanceAsyncSingleCrit = R6Class("FSelectInstanceAsyncSingleCrit", check_values = FALSE, callbacks = NULL, ties_method = "least_features", - rush = NULL + rush = NULL, + aggregate_fast = FALSE ) { require_namespaces("rush") learner = assert_learner(as_learner(learner, clone = TRUE)) @@ -71,7 +73,8 @@ FSelectInstanceAsyncSingleCrit = R6Class("FSelectInstanceAsyncSingleCrit", store_benchmark_result = store_benchmark_result, store_models = store_models, check_values = check_values, - callbacks = callbacks) + callbacks = callbacks, + aggregate_fast = aggregate_fast) super$initialize( objective, diff --git a/R/FSelectInstanceBatchSingleCrit.R b/R/FSelectInstanceBatchSingleCrit.R index 563e2eaf..a69b627c 100644 --- a/R/FSelectInstanceBatchSingleCrit.R +++ b/R/FSelectInstanceBatchSingleCrit.R @@ -54,6 +54,7 @@ #' @template param_callbacks #' @template param_xdt #' @template param_ties_method +#' @template param_aggregate_fast #' #' @export #' @examples @@ -103,7 +104,8 @@ FSelectInstanceBatchSingleCrit = R6Class("FSelectInstanceBatchSingleCrit", store_models = FALSE, check_values = FALSE, callbacks = NULL, - ties_method = "least_features" + ties_method = "least_features", + aggregate_fast = FALSE ) { # initialized specialized fselect archive and objective archive = ArchiveBatchFSelect$new( @@ -121,7 +123,8 @@ FSelectInstanceBatchSingleCrit = R6Class("FSelectInstanceBatchSingleCrit", store_models = store_models, check_values = check_values, archive = archive, - callbacks = callbacks) + callbacks = callbacks, + aggregate_fast = aggregate_fast) super$initialize(objective, objective$domain, terminator, callbacks = callbacks) diff --git a/R/ObjectiveFSelectAsync.R b/R/ObjectiveFSelectAsync.R index 05983c11..816c0d5c 100644 --- a/R/ObjectiveFSelectAsync.R +++ b/R/ObjectiveFSelectAsync.R @@ -12,6 +12,7 @@ #' @template param_check_values #' @template param_store_benchmark_result #' @template param_callbacks +#' @template param_aggregate_fast #' #' @export ObjectiveFSelectAsync = R6Class("ObjectiveFSelectAsync", @@ -27,8 +28,11 @@ ObjectiveFSelectAsync = R6Class("ObjectiveFSelectAsync", check_values = TRUE, store_benchmark_result = TRUE, store_models = FALSE, - callbacks = NULL - ) { + callbacks = NULL, + aggregate_fast = FALSE + ) { + + assert_flag(aggregate_fast) super$initialize( task = task, learner = learner, @@ -40,7 +44,11 @@ ObjectiveFSelectAsync = R6Class("ObjectiveFSelectAsync", callbacks = callbacks ) - private$.aggregator = if (all(c("requires_task", "requires_learner", "requires_model", "requires_train_set") %nin% self$measures$properties) && self$codomain$length == 1) async_aggregator_fast else async_aggregator_default + if (aggregate_fast && any(c("requires_task", "requires_learner", "requires_model", "requires_train_set") %in% self$measures$properties)) { + stopf("Fast aggregation is only supported for measures that do not require task, learner, model or train set") + } + + private$.aggregator = if (aggregate_fast) aggregator_fast else aggregator_default } ), private = list( diff --git a/R/ObjectiveFSelectBatch.R b/R/ObjectiveFSelectBatch.R index 44fa8df9..5dd0351f 100644 --- a/R/ObjectiveFSelectBatch.R +++ b/R/ObjectiveFSelectBatch.R @@ -12,6 +12,7 @@ #' @template param_check_values #' @template param_store_benchmark_result #' @template param_callbacks +#' @template param_aggregate_fast #' #' @export ObjectiveFSelectBatch = R6Class("ObjectiveFSelectBatch", @@ -36,10 +37,12 @@ ObjectiveFSelectBatch = R6Class("ObjectiveFSelectBatch", store_benchmark_result = TRUE, store_models = FALSE, archive = NULL, - callbacks = NULL + callbacks = NULL, + aggregate_fast = FALSE ) { self$archive = assert_r6(archive, "ArchiveBatchFSelect", null.ok = TRUE) if (is.null(self$archive)) store_benchmark_result = store_models = FALSE + assert_flag(aggregate_fast) super$initialize( task = task, @@ -52,7 +55,11 @@ ObjectiveFSelectBatch = R6Class("ObjectiveFSelectBatch", callbacks = callbacks ) - private$.aggregator = if (all(c("requires_task", "requires_learner", "requires_model", "requires_train_set") %nin% self$measures$properties) && self$codomain$length == 1) aggregator_fast else aggregator_default + if (aggregate_fast && any(c("requires_task", "requires_learner", "requires_model", "requires_train_set") %in% self$measures$properties)) { + stopf("Fast aggregation is only supported for measures that do not require task, learner, model or train set") + } + + private$.aggregator = if (aggregate_fast) aggregator_fast else aggregator_default } ), @@ -82,6 +89,7 @@ ObjectiveFSelectBatch = R6Class("ObjectiveFSelectBatch", lg$debug("Aggregating performance") # aggregate performance scores + browser() private$.aggregated_performance = private$.aggregator(private$.benchmark_result, self$measures, self$codomain) lg$debug("Aggregated performance %s", as_short_string(private$.aggregated_performance)) diff --git a/R/fselect.R b/R/fselect.R index a492dc55..46e30b93 100644 --- a/R/fselect.R +++ b/R/fselect.R @@ -42,6 +42,7 @@ #' @template param_callbacks #' @template param_rush #' @template param_ties_method +#' @template param_aggregate_fast #' #' @export #' @examples @@ -82,7 +83,8 @@ fselect = function( check_values = FALSE, callbacks = NULL, ties_method = "least_features", - rush = NULL + rush = NULL, + aggregate_fast = FALSE ) { assert_fselector(fselector) terminator = terminator %??% terminator_selection(term_evals, term_time) @@ -100,7 +102,8 @@ fselect = function( check_values = check_values, callbacks = callbacks, rush = rush, - ties_method = ties_method + ties_method = ties_method, + aggregate_fast = aggregate_fast ) } else { FSelectInstanceAsyncMultiCrit$new( @@ -128,7 +131,8 @@ fselect = function( store_models = store_models, check_values = check_values, callbacks = callbacks, - ties_method = ties_method + ties_method = ties_method, + aggregate_fast = aggregate_fast ) } else { FSelectInstanceBatchMultiCrit$new( diff --git a/R/sugar.R b/R/sugar.R index f0a02180..b8d573f8 100644 --- a/R/sugar.R +++ b/R/sugar.R @@ -47,6 +47,7 @@ fss = function(.keys, ...) { #' @template param_check_values #' @template param_callbacks #' @template param_ties_method +#' @template param_aggregate_fast #' #' @inheritSection FSelectInstanceBatchSingleCrit Resources #' @inheritSection FSelectInstanceBatchSingleCrit Default Measures @@ -63,7 +64,8 @@ fsi = function( store_models = FALSE, check_values = FALSE, callbacks = NULL, - ties_method = "least_features" + ties_method = "least_features", + aggregate_fast = FALSE ) { if (is.null(measures) || inherits(measures, "Measure")) { FSelectInstanceBatchSingleCrit$new( @@ -76,7 +78,8 @@ fsi = function( store_models = store_models, check_values = check_values, callbacks = callbacks, - ties_method = ties_method + ties_method = ties_method, + aggregate_fast = aggregate_fast ) } else { FSelectInstanceBatchMultiCrit$new( @@ -111,6 +114,7 @@ fsi = function( #' @template param_callbacks #' @template param_rush #' @template param_ties_method +#' @template param_aggregate_fast #' #' @inheritSection FSelectInstanceBatchSingleCrit Resources #' @inheritSection FSelectInstanceBatchSingleCrit Default Measures @@ -128,7 +132,8 @@ fsi_async = function( check_values = FALSE, callbacks = NULL, ties_method = "least_features", - rush = NULL + rush = NULL, + aggregate_fast = FALSE ) { if (is.null(measures) || inherits(measures, "Measure")) { FSelectInstanceAsyncSingleCrit$new( @@ -142,7 +147,8 @@ fsi_async = function( check_values = check_values, callbacks = callbacks, rush = rush, - ties_method = ties_method) + ties_method = ties_method, + aggregate_fast = aggregate_fast) } else { FSelectInstanceAsyncMultiCrit$new( task = task, diff --git a/man-roxygen/param_aggregate_fast.R b/man-roxygen/param_aggregate_fast.R new file mode 100644 index 00000000..91ba62fb --- /dev/null +++ b/man-roxygen/param_aggregate_fast.R @@ -0,0 +1,5 @@ +#' @param aggregate_fast (`logical(1)`)\cr +#' If `TRUE`, the performance values are aggregated in a fast way. +#' This is only supported for measures that do not require task, learner, model or train set. +#' The archive does not contain warnings and errors. +#' Default is `FALSE`. diff --git a/man/FSelectInstanceAsyncSingleCrit.Rd b/man/FSelectInstanceAsyncSingleCrit.Rd index 43c87c58..12bd0c74 100644 --- a/man/FSelectInstanceAsyncSingleCrit.Rd +++ b/man/FSelectInstanceAsyncSingleCrit.Rd @@ -81,7 +81,8 @@ Creates a new instance of this \link[R6:R6Class]{R6} class. check_values = FALSE, callbacks = NULL, ties_method = "least_features", - rush = NULL + rush = NULL, + aggregate_fast = FALSE )}\if{html}{\out{}} } @@ -128,6 +129,12 @@ Ignored if multiple measures are used.} \item{\code{rush}}{(\code{Rush})\cr If a rush instance is supplied, the optimization runs without batches.} + +\item{\code{aggregate_fast}}{(\code{logical(1)})\cr +If \code{TRUE}, the performance values are aggregated in a fast way. +This is only supported for measures that do not require task, learner, model or train set. +The archive does not contain warnings and errors. +Default is \code{FALSE}.} } \if{html}{\out{}} } diff --git a/man/FSelectInstanceBatchSingleCrit.Rd b/man/FSelectInstanceBatchSingleCrit.Rd index bb840beb..b61ea0e1 100644 --- a/man/FSelectInstanceBatchSingleCrit.Rd +++ b/man/FSelectInstanceBatchSingleCrit.Rd @@ -137,7 +137,8 @@ Creates a new instance of this \link[R6:R6Class]{R6} class. store_models = FALSE, check_values = FALSE, callbacks = NULL, - ties_method = "least_features" + ties_method = "least_features", + aggregate_fast = FALSE )}\if{html}{\out{}} } @@ -181,6 +182,12 @@ The option \code{"least_features"} (default) selects the feature set with the le If there are multiple best feature sets with the same number of features, one is selected randomly. The \code{random} method returns a random feature set from the best feature sets. Ignored if multiple measures are used.} + +\item{\code{aggregate_fast}}{(\code{logical(1)})\cr +If \code{TRUE}, the performance values are aggregated in a fast way. +This is only supported for measures that do not require task, learner, model or train set. +The archive does not contain warnings and errors. +Default is \code{FALSE}.} } \if{html}{\out{}} } diff --git a/man/ObjectiveFSelectAsync.Rd b/man/ObjectiveFSelectAsync.Rd index b9aad69c..9d1f9d40 100644 --- a/man/ObjectiveFSelectAsync.Rd +++ b/man/ObjectiveFSelectAsync.Rd @@ -13,6 +13,7 @@ This class is usually constructed internally by the \link{FSelectInstanceAsyncSi \section{Methods}{ \subsection{Public methods}{ \itemize{ +\item \href{#method-ObjectiveFSelectAsync-new}{\code{ObjectiveFSelectAsync$new()}} \item \href{#method-ObjectiveFSelectAsync-clone}{\code{ObjectiveFSelectAsync$clone()}} } } @@ -25,11 +26,69 @@ This class is usually constructed internally by the \link{FSelectInstanceAsyncSi
  • bbotk::Objective$format()
  • bbotk::Objective$help()
  • bbotk::Objective$print()
  • -
  • mlr3fselect::ObjectiveFSelect$initialize()
  • }} \if{html}{\out{
    }} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ObjectiveFSelectAsync-new}{}}} +\subsection{Method \code{new()}}{ +Creates a new instance of this \link[R6:R6Class]{R6} class. +\subsection{Usage}{ +\if{html}{\out{
    }}\preformatted{ObjectiveFSelectAsync$new( + task, + learner, + resampling, + measures, + check_values = TRUE, + store_benchmark_result = TRUE, + store_models = FALSE, + callbacks = NULL, + aggregate_fast = FALSE +)}\if{html}{\out{
    }} +} + +\subsection{Arguments}{ +\if{html}{\out{
    }} +\describe{ +\item{\code{task}}{(\link[mlr3:Task]{mlr3::Task})\cr +Task to operate on.} + +\item{\code{learner}}{(\link[mlr3:Learner]{mlr3::Learner})\cr +Learner to optimize the feature subset for.} + +\item{\code{resampling}}{(\link[mlr3:Resampling]{mlr3::Resampling})\cr +Resampling that is used to evaluated the performance of the feature subsets. +Uninstantiated resamplings are instantiated during construction so that all feature subsets are evaluated on the same data splits. +Already instantiated resamplings are kept unchanged.} + +\item{\code{measures}}{(list of \link[mlr3:Measure]{mlr3::Measure})\cr +Measures to optimize. +If \code{NULL}, \CRANpkg{mlr3}'s default measure is used.} + +\item{\code{check_values}}{(\code{logical(1)})\cr +Check the parameters before the evaluation and the results for +validity?} + +\item{\code{store_benchmark_result}}{(\code{logical(1)})\cr +Store benchmark result in archive?} + +\item{\code{store_models}}{(\code{logical(1)}). +Store models in benchmark result?} + +\item{\code{callbacks}}{(list of \link{CallbackBatchFSelect})\cr +List of callbacks.} + +\item{\code{aggregate_fast}}{(\code{logical(1)})\cr +If \code{TRUE}, the performance values are aggregated in a fast way. +This is only supported for measures that do not require task, learner, model or train set. +The archive does not contain warnings and errors. +Default is \code{FALSE}.} +} +\if{html}{\out{
    }} +} +} +\if{html}{\out{
    }} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-ObjectiveFSelectAsync-clone}{}}} \subsection{Method \code{clone()}}{ diff --git a/man/ObjectiveFSelectBatch.Rd b/man/ObjectiveFSelectBatch.Rd index ba9e1e37..84116d62 100644 --- a/man/ObjectiveFSelectBatch.Rd +++ b/man/ObjectiveFSelectBatch.Rd @@ -51,7 +51,8 @@ Creates a new instance of this \link[R6:R6Class]{R6} class. store_benchmark_result = TRUE, store_models = FALSE, archive = NULL, - callbacks = NULL + callbacks = NULL, + aggregate_fast = FALSE )}\if{html}{\out{}} } @@ -89,6 +90,12 @@ If \code{NULL} (default), benchmark result and models cannot be stored.} \item{\code{callbacks}}{(list of \link{CallbackBatchFSelect})\cr List of callbacks.} + +\item{\code{aggregate_fast}}{(\code{logical(1)})\cr +If \code{TRUE}, the performance values are aggregated in a fast way. +This is only supported for measures that do not require task, learner, model or train set. +The archive does not contain warnings and errors. +Default is \code{FALSE}.} } \if{html}{\out{}} } diff --git a/man/fselect.Rd b/man/fselect.Rd index 42365f78..c05d949d 100644 --- a/man/fselect.Rd +++ b/man/fselect.Rd @@ -18,7 +18,8 @@ fselect( check_values = FALSE, callbacks = NULL, ties_method = "least_features", - rush = NULL + rush = NULL, + aggregate_fast = FALSE ) } \arguments{ @@ -74,6 +75,12 @@ Ignored if multiple measures are used.} \item{rush}{(\code{Rush})\cr If a rush instance is supplied, the optimization runs without batches.} + +\item{aggregate_fast}{(\code{logical(1)})\cr +If \code{TRUE}, the performance values are aggregated in a fast way. +This is only supported for measures that do not require task, learner, model or train set. +The archive does not contain warnings and errors. +Default is \code{FALSE}.} } \value{ \link{FSelectInstanceBatchSingleCrit} | \link{FSelectInstanceBatchMultiCrit} diff --git a/man/fsi.Rd b/man/fsi.Rd index af21f23c..c983e853 100644 --- a/man/fsi.Rd +++ b/man/fsi.Rd @@ -14,7 +14,8 @@ fsi( store_models = FALSE, check_values = FALSE, callbacks = NULL, - ties_method = "least_features" + ties_method = "least_features", + aggregate_fast = FALSE ) } \arguments{ @@ -56,6 +57,12 @@ The option \code{"least_features"} (default) selects the feature set with the le If there are multiple best feature sets with the same number of features, one is selected randomly. The \code{random} method returns a random feature set from the best feature sets. Ignored if multiple measures are used.} + +\item{aggregate_fast}{(\code{logical(1)})\cr +If \code{TRUE}, the performance values are aggregated in a fast way. +This is only supported for measures that do not require task, learner, model or train set. +The archive does not contain warnings and errors. +Default is \code{FALSE}.} } \description{ Function to construct a \link{FSelectInstanceBatchSingleCrit} or \link{FSelectInstanceBatchMultiCrit}. diff --git a/man/fsi_async.Rd b/man/fsi_async.Rd index f53f5dc7..67fc0599 100644 --- a/man/fsi_async.Rd +++ b/man/fsi_async.Rd @@ -15,7 +15,8 @@ fsi_async( check_values = FALSE, callbacks = NULL, ties_method = "least_features", - rush = NULL + rush = NULL, + aggregate_fast = FALSE ) } \arguments{ @@ -60,6 +61,12 @@ Ignored if multiple measures are used.} \item{rush}{(\code{Rush})\cr If a rush instance is supplied, the optimization runs without batches.} + +\item{aggregate_fast}{(\code{logical(1)})\cr +If \code{TRUE}, the performance values are aggregated in a fast way. +This is only supported for measures that do not require task, learner, model or train set. +The archive does not contain warnings and errors. +Default is \code{FALSE}.} } \description{ Function to construct a \link{FSelectInstanceAsyncSingleCrit} or \link{FSelectInstanceAsyncMultiCrit}. diff --git a/man/mlr_fselectors_async_random_search.Rd b/man/mlr_fselectors_async_random_search.Rd index c67a7f7f..3fde6cfd 100644 --- a/man/mlr_fselectors_async_random_search.Rd +++ b/man/mlr_fselectors_async_random_search.Rd @@ -13,11 +13,6 @@ Bergstra J, Bengio Y (2012). \description{ Feature selection using Asynchronous Random Search Algorithm. } -\details{ -The feature sets are randomly drawn. -The sets are evaluated asynchronously. -The algorithm uses \link[bbotk:mlr_optimizers_async_random_search]{bbotk::OptimizerAsyncRandomSearch} for optimization. -} \section{Dictionary}{ This \link{FSelector} can be instantiated with the associated sugar function \code{\link[=fs]{fs()}}: From 9ee9b7f5c669b19f5f4626b92d566b8457e8c7b5 Mon Sep 17 00:00:00 2001 From: be-marc Date: Thu, 5 Jun 2025 12:14:58 +0200 Subject: [PATCH 04/10] ... --- R/ObjectiveFSelectBatch.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/ObjectiveFSelectBatch.R b/R/ObjectiveFSelectBatch.R index 5dd0351f..5c86347b 100644 --- a/R/ObjectiveFSelectBatch.R +++ b/R/ObjectiveFSelectBatch.R @@ -89,7 +89,6 @@ ObjectiveFSelectBatch = R6Class("ObjectiveFSelectBatch", lg$debug("Aggregating performance") # aggregate performance scores - browser() private$.aggregated_performance = private$.aggregator(private$.benchmark_result, self$measures, self$codomain) lg$debug("Aggregated performance %s", as_short_string(private$.aggregated_performance)) From 94a61c3eab87df69f7ca454a87573b4fb4d941b6 Mon Sep 17 00:00:00 2001 From: be-marc Date: Thu, 5 Jun 2025 12:22:35 +0200 Subject: [PATCH 05/10] ... --- .../test_FSelectInstanceAsyncSingleCrit.R | 25 +++++++++ .../testthat/test_FSelectInstanceSingleCrit.R | 54 +++++++++++++++++++ 2 files changed, 79 insertions(+) diff --git a/tests/testthat/test_FSelectInstanceAsyncSingleCrit.R b/tests/testthat/test_FSelectInstanceAsyncSingleCrit.R index 7210d6c4..91c2b8ef 100644 --- a/tests/testthat/test_FSelectInstanceAsyncSingleCrit.R +++ b/tests/testthat/test_FSelectInstanceAsyncSingleCrit.R @@ -172,3 +172,28 @@ test_that("saving the models with FSelectInstanceAsyncSingleCrit works", { # fselector$optimize(instance) # }) + +test_that("fast aggregation works", { + skip_on_cran() + skip_if_not_installed("rush") + flush_redis() + + on.exit(mirai::daemons(0)) + mirai::daemons(2) + rush::rush_plan(n_workers = 2, worker_type = "remote") + + instance = fsi_async( + task = tsk("pima"), + learner = lrn("classif.rpart"), + resampling = rsmp("cv", folds = 3), + measures = msr("classif.ce"), + terminator = trm("evals", n_evals = 3), + aggregate_fast = TRUE + ) + + fselector = fs("async_random_search") + fselector$optimize(instance) + + expect_data_table(instance$archive$data, min.rows = 3L) + expect_rush_reset(instance$rush, type = "kill") +}) diff --git a/tests/testthat/test_FSelectInstanceSingleCrit.R b/tests/testthat/test_FSelectInstanceSingleCrit.R index 385c2ecd..1ff8d7db 100644 --- a/tests/testthat/test_FSelectInstanceSingleCrit.R +++ b/tests/testthat/test_FSelectInstanceSingleCrit.R @@ -132,3 +132,57 @@ test_that("objective contains no benchmark results", { expect_null(instance$objective$.__enclos_env__$private$.benchmark_result) }) + + +test_that("fast aggregation works", { + + instance = fsi( + task = tsk("pima"), + learner = lrn("classif.rpart"), + resampling = rsmp("cv", folds = 3), + measures = msr("classif.ce"), + terminator = trm("evals", n_evals = 3), + aggregate_fast = TRUE + ) + + fselector = fs("random_search") + fselector$optimize(instance) + + expect_names(names(instance$archive$data), disjunct.from = c("warnings", "errors")) + expect_data_table(instance$archive$data, min.rows = 3L) +}) + +test_that("fast aggregation and default produce the same results", { + with_seed(123, { + instance_fast = fsi( + task = tsk("pima"), + learner = lrn("classif.rpart"), + resampling = rsmp("cv", folds = 3), + measures = msr("classif.ce"), + terminator = trm("evals", n_evals = 3), + aggregate_fast = TRUE + ) + + fselector = fs("random_search") + fselector$optimize(instance_fast) + scores_fast = instance_fast$archive$data$classif.ce + }) + + with_seed(123, { + instance_default = fsi( + task = tsk("pima"), + learner = lrn("classif.rpart"), + resampling = rsmp("cv", folds = 3), + measures = msr("classif.ce"), + terminator = trm("evals", n_evals = 3), + aggregate_fast = FALSE + ) + + fselector = fs("random_search") + fselector$optimize(instance_default) + scores_default = instance_default$archive$data$classif.ce + }) + + expect_equal(scores_fast, scores_default) +}) + From 7deb731df04ac3ea30d64f7a601909c91993c2db Mon Sep 17 00:00:00 2001 From: be-marc Date: Thu, 5 Jun 2025 12:29:13 +0200 Subject: [PATCH 06/10] ... --- R/ObjectiveFSelectAsync.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/ObjectiveFSelectAsync.R b/R/ObjectiveFSelectAsync.R index 816c0d5c..539ce9f5 100644 --- a/R/ObjectiveFSelectAsync.R +++ b/R/ObjectiveFSelectAsync.R @@ -31,7 +31,6 @@ ObjectiveFSelectAsync = R6Class("ObjectiveFSelectAsync", callbacks = NULL, aggregate_fast = FALSE ) { - assert_flag(aggregate_fast) super$initialize( task = task, @@ -48,7 +47,7 @@ ObjectiveFSelectAsync = R6Class("ObjectiveFSelectAsync", stopf("Fast aggregation is only supported for measures that do not require task, learner, model or train set") } - private$.aggregator = if (aggregate_fast) aggregator_fast else aggregator_default + private$.aggregator = if (aggregate_fast) async_aggregator_fast else async_aggregator_default } ), private = list( From 48f6b2bd5f82da51c466d4f2ed06ad1625fa8b67 Mon Sep 17 00:00:00 2001 From: be-marc Date: Fri, 6 Jun 2025 09:45:38 +0200 Subject: [PATCH 07/10] ... --- tests/testthat/test_fselect.R | 2 +- tests/testthat/test_fsi.R | 2 +- tests/testthat/test_fsi_async.R | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test_fselect.R b/tests/testthat/test_fselect.R index 7cf0fad7..98f422ab 100644 --- a/tests/testthat/test_fselect.R +++ b/tests/testthat/test_fselect.R @@ -38,7 +38,7 @@ test_that("fselect interface is equal to FSelectInstanceBatchSingleCrit", { test_that("fselect interface is equal to FSelectInstanceBatchMultiCrit", { fselect_args = formalArgs(fselect) - fselect_args = fselect_args[fselect_args %nin% c("fselector", "ties_method")] + fselect_args = fselect_args[fselect_args %nin% c("fselector", "ties_method", "aggregate_fast")] instance_args = formalArgs(FSelectInstanceBatchMultiCrit$public_methods$initialize) instance_args = c(instance_args, "term_evals", "term_time", "rush") diff --git a/tests/testthat/test_fsi.R b/tests/testthat/test_fsi.R index e327dfe8..29ad5caf 100644 --- a/tests/testthat/test_fsi.R +++ b/tests/testthat/test_fsi.R @@ -43,7 +43,7 @@ test_that("fsi and FSelectInstanceBatchSingleCrit are equal", { test_that("fsi and FSelectInstanceBatchMultiCrit are equal", { fsi_args = formalArgs(fsi) - fsi_args = fsi_args[fsi_args != "ties_method"] + fsi_args = fsi_args[fsi_args %nin% c("ties_method", "aggregate_fast")] expect_equal(fsi_args, formalArgs(FSelectInstanceBatchMultiCrit$public_methods$initialize)) diff --git a/tests/testthat/test_fsi_async.R b/tests/testthat/test_fsi_async.R index fca2c7aa..ffc0f696 100644 --- a/tests/testthat/test_fsi_async.R +++ b/tests/testthat/test_fsi_async.R @@ -44,7 +44,7 @@ test_that("fsi_async interface is equal to FSelectInstanceAsyncMultiCrit", { flush_redis() fsi_args = formalArgs(fsi_async) - fsi_args = fsi_args[fsi_args != "ties_method"] + fsi_args = fsi_args[fsi_args %nin% c("ties_method", "aggregate_fast")] instance_args = formalArgs(FSelectInstanceAsyncMultiCrit$public_methods$initialize) expect_equal(fsi_args, instance_args) From bed33ebbd4d8e54497ba9a07d8a2b49a82c88a2e Mon Sep 17 00:00:00 2001 From: be-marc Date: Fri, 6 Jun 2025 12:16:40 +0200 Subject: [PATCH 08/10] ... --- R/FSelectInstanceAsyncSingleCrit.R | 7 +- R/FSelectInstanceBatchSingleCrit.R | 7 +- R/ObjectiveFSelectAsync.R | 13 ++-- R/ObjectiveFSelectBatch.R | 46 ++++++++----- R/fselect.R | 10 +-- R/helper.R | 2 +- R/sugar.R | 14 ++-- inst/testthat/helper_misc.R | 2 +- .../test_FSelectInstanceAsyncSingleCrit.R | 62 +++++++++++++++++ .../testthat/test_FSelectInstanceSingleCrit.R | 69 ++++++++----------- 10 files changed, 138 insertions(+), 94 deletions(-) diff --git a/R/FSelectInstanceAsyncSingleCrit.R b/R/FSelectInstanceAsyncSingleCrit.R index 1da6d1a4..0148eee6 100644 --- a/R/FSelectInstanceAsyncSingleCrit.R +++ b/R/FSelectInstanceAsyncSingleCrit.R @@ -19,7 +19,6 @@ #' @template param_callbacks #' @template param_rush #' @template param_ties_method -#' @template param_aggregate_fast #' #' @template param_xdt #' @template param_extra @@ -42,8 +41,7 @@ FSelectInstanceAsyncSingleCrit = R6Class("FSelectInstanceAsyncSingleCrit", check_values = FALSE, callbacks = NULL, ties_method = "least_features", - rush = NULL, - aggregate_fast = FALSE + rush = NULL ) { require_namespaces("rush") learner = assert_learner(as_learner(learner, clone = TRUE)) @@ -73,8 +71,7 @@ FSelectInstanceAsyncSingleCrit = R6Class("FSelectInstanceAsyncSingleCrit", store_benchmark_result = store_benchmark_result, store_models = store_models, check_values = check_values, - callbacks = callbacks, - aggregate_fast = aggregate_fast) + callbacks = callbacks) super$initialize( objective, diff --git a/R/FSelectInstanceBatchSingleCrit.R b/R/FSelectInstanceBatchSingleCrit.R index a69b627c..563e2eaf 100644 --- a/R/FSelectInstanceBatchSingleCrit.R +++ b/R/FSelectInstanceBatchSingleCrit.R @@ -54,7 +54,6 @@ #' @template param_callbacks #' @template param_xdt #' @template param_ties_method -#' @template param_aggregate_fast #' #' @export #' @examples @@ -104,8 +103,7 @@ FSelectInstanceBatchSingleCrit = R6Class("FSelectInstanceBatchSingleCrit", store_models = FALSE, check_values = FALSE, callbacks = NULL, - ties_method = "least_features", - aggregate_fast = FALSE + ties_method = "least_features" ) { # initialized specialized fselect archive and objective archive = ArchiveBatchFSelect$new( @@ -123,8 +121,7 @@ FSelectInstanceBatchSingleCrit = R6Class("FSelectInstanceBatchSingleCrit", store_models = store_models, check_values = check_values, archive = archive, - callbacks = callbacks, - aggregate_fast = aggregate_fast) + callbacks = callbacks) super$initialize(objective, objective$domain, terminator, callbacks = callbacks) diff --git a/R/ObjectiveFSelectAsync.R b/R/ObjectiveFSelectAsync.R index 539ce9f5..a974dce3 100644 --- a/R/ObjectiveFSelectAsync.R +++ b/R/ObjectiveFSelectAsync.R @@ -12,7 +12,6 @@ #' @template param_check_values #' @template param_store_benchmark_result #' @template param_callbacks -#' @template param_aggregate_fast #' #' @export ObjectiveFSelectAsync = R6Class("ObjectiveFSelectAsync", @@ -28,10 +27,8 @@ ObjectiveFSelectAsync = R6Class("ObjectiveFSelectAsync", check_values = TRUE, store_benchmark_result = TRUE, store_models = FALSE, - callbacks = NULL, - aggregate_fast = FALSE + callbacks = NULL ) { - assert_flag(aggregate_fast) super$initialize( task = task, learner = learner, @@ -43,11 +40,11 @@ ObjectiveFSelectAsync = R6Class("ObjectiveFSelectAsync", callbacks = callbacks ) - if (aggregate_fast && any(c("requires_task", "requires_learner", "requires_model", "requires_train_set") %in% self$measures$properties)) { - stopf("Fast aggregation is only supported for measures that do not require task, learner, model or train set") + if (self$codomain$length == 1 && all(c("requires_task", "requires_learner", "requires_model", "requires_train_set") %nin% self$measures$properties)) { + private$.aggregator = async_aggregator_fast + } else { + private$.aggregator = async_aggregator_default } - - private$.aggregator = if (aggregate_fast) async_aggregator_fast else async_aggregator_default } ), private = list( diff --git a/R/ObjectiveFSelectBatch.R b/R/ObjectiveFSelectBatch.R index 5c86347b..b2a5acd6 100644 --- a/R/ObjectiveFSelectBatch.R +++ b/R/ObjectiveFSelectBatch.R @@ -12,7 +12,6 @@ #' @template param_check_values #' @template param_store_benchmark_result #' @template param_callbacks -#' @template param_aggregate_fast #' #' @export ObjectiveFSelectBatch = R6Class("ObjectiveFSelectBatch", @@ -37,12 +36,10 @@ ObjectiveFSelectBatch = R6Class("ObjectiveFSelectBatch", store_benchmark_result = TRUE, store_models = FALSE, archive = NULL, - callbacks = NULL, - aggregate_fast = FALSE + callbacks = NULL ) { self$archive = assert_r6(archive, "ArchiveBatchFSelect", null.ok = TRUE) if (is.null(self$archive)) store_benchmark_result = store_models = FALSE - assert_flag(aggregate_fast) super$initialize( task = task, @@ -54,12 +51,12 @@ ObjectiveFSelectBatch = R6Class("ObjectiveFSelectBatch", check_values = check_values, callbacks = callbacks ) - - if (aggregate_fast && any(c("requires_task", "requires_learner", "requires_model", "requires_train_set") %in% self$measures$properties)) { - stopf("Fast aggregation is only supported for measures that do not require task, learner, model or train set") + measure_properties = unlist(map(self$measures, "properties")) + if (self$codomain$length == 1 && all(c("requires_task", "requires_learner", "requires_model", "requires_train_set") %nin% measure_properties)) { + private$.aggregator = aggregator_fast + } else { + private$.aggregator = aggregator_default } - - private$.aggregator = if (aggregate_fast) aggregator_fast else aggregator_default } ), @@ -93,12 +90,6 @@ ObjectiveFSelectBatch = R6Class("ObjectiveFSelectBatch", lg$debug("Aggregated performance %s", as_short_string(private$.aggregated_performance)) - # add runtime to evaluations - time = map_dbl(private$.benchmark_result$resample_results$resample_result, function(rr) { - sum(map_dbl(get_private(rr)$.data$learner_states(get_private(rr)$.view), function(state) state$train_time + state$predict_time)) - }) - set(private$.aggregated_performance, j = "runtime_learners", value = time) - # store benchmark result in archive if (self$store_benchmark_result) { lg$debug("Storing resample result") @@ -121,9 +112,28 @@ ObjectiveFSelectBatch = R6Class("ObjectiveFSelectBatch", ) aggregator_default = function(benchmark_result, measures, codomain) { - benchmark_result$aggregate(measures, conditions = TRUE)[, c(codomain$target_ids, "warnings", "errors"), with = FALSE] + aggr = benchmark_result$aggregate(measures, conditions = TRUE)[, c(codomain$target_ids, "warnings", "errors"), with = FALSE] + + # add runtime + data = get_private(benchmark_result)$.data$data + tab = data$fact[data$uhashes, c("uhash", "learner_state"), with = FALSE] + learner_state = NULL + runtime = tab[, sum(map_dbl(learner_state, function(s) sum(s$train_time + s$predict_time))), by = uhash]$V1 + set(aggr, j = "runtime_learners", value = runtime) + aggr } aggregator_fast = function(benchmark_result, measures, codomain) { - mlr3::faggregate(benchmark_result, measures[[1]]) -} \ No newline at end of file + aggr = faggregate(benchmark_result, measures[[1]]) + + # add runtime and conditions + data = get_private(benchmark_result)$.data$data + tab = data$fact[data$uhashes, c("uhash", "learner_state"), with = FALSE] + + learner_state = NULL + aggr[tab[, list( + errors = sum(map_int(learner_state, function(s) sum(s$log$class == "error"))), + warnings = sum(map_int(learner_state, function(s) sum(s$log$class == "warning"))), + runtime_learners = sum(map_dbl(learner_state, function(s) sum(s$train_time + s$predict_time))) + ), by = uhash], on = "uhash"] +} diff --git a/R/fselect.R b/R/fselect.R index 46e30b93..a492dc55 100644 --- a/R/fselect.R +++ b/R/fselect.R @@ -42,7 +42,6 @@ #' @template param_callbacks #' @template param_rush #' @template param_ties_method -#' @template param_aggregate_fast #' #' @export #' @examples @@ -83,8 +82,7 @@ fselect = function( check_values = FALSE, callbacks = NULL, ties_method = "least_features", - rush = NULL, - aggregate_fast = FALSE + rush = NULL ) { assert_fselector(fselector) terminator = terminator %??% terminator_selection(term_evals, term_time) @@ -102,8 +100,7 @@ fselect = function( check_values = check_values, callbacks = callbacks, rush = rush, - ties_method = ties_method, - aggregate_fast = aggregate_fast + ties_method = ties_method ) } else { FSelectInstanceAsyncMultiCrit$new( @@ -131,8 +128,7 @@ fselect = function( store_models = store_models, check_values = check_values, callbacks = callbacks, - ties_method = ties_method, - aggregate_fast = aggregate_fast + ties_method = ties_method ) } else { FSelectInstanceBatchMultiCrit$new( diff --git a/R/helper.R b/R/helper.R index cd9b1117..e41ffaf1 100644 --- a/R/helper.R +++ b/R/helper.R @@ -19,7 +19,7 @@ measures_to_codomain = function(measures) { } extract_runtime = function(resample_result) { - runtimes = map_dbl(get_private(resample_result)$.data$learner_states(get_private(resample_result)$.view), function(state) { + runtimes = map_dbl(get_private(resample_result)$.data$learner_states(), function(state) { state$train_time + state$predict_time }) sum(runtimes) diff --git a/R/sugar.R b/R/sugar.R index b8d573f8..f0a02180 100644 --- a/R/sugar.R +++ b/R/sugar.R @@ -47,7 +47,6 @@ fss = function(.keys, ...) { #' @template param_check_values #' @template param_callbacks #' @template param_ties_method -#' @template param_aggregate_fast #' #' @inheritSection FSelectInstanceBatchSingleCrit Resources #' @inheritSection FSelectInstanceBatchSingleCrit Default Measures @@ -64,8 +63,7 @@ fsi = function( store_models = FALSE, check_values = FALSE, callbacks = NULL, - ties_method = "least_features", - aggregate_fast = FALSE + ties_method = "least_features" ) { if (is.null(measures) || inherits(measures, "Measure")) { FSelectInstanceBatchSingleCrit$new( @@ -78,8 +76,7 @@ fsi = function( store_models = store_models, check_values = check_values, callbacks = callbacks, - ties_method = ties_method, - aggregate_fast = aggregate_fast + ties_method = ties_method ) } else { FSelectInstanceBatchMultiCrit$new( @@ -114,7 +111,6 @@ fsi = function( #' @template param_callbacks #' @template param_rush #' @template param_ties_method -#' @template param_aggregate_fast #' #' @inheritSection FSelectInstanceBatchSingleCrit Resources #' @inheritSection FSelectInstanceBatchSingleCrit Default Measures @@ -132,8 +128,7 @@ fsi_async = function( check_values = FALSE, callbacks = NULL, ties_method = "least_features", - rush = NULL, - aggregate_fast = FALSE + rush = NULL ) { if (is.null(measures) || inherits(measures, "Measure")) { FSelectInstanceAsyncSingleCrit$new( @@ -147,8 +142,7 @@ fsi_async = function( check_values = check_values, callbacks = callbacks, rush = rush, - ties_method = ties_method, - aggregate_fast = aggregate_fast) + ties_method = ties_method) } else { FSelectInstanceAsyncMultiCrit$new( task = task, diff --git a/inst/testthat/helper_misc.R b/inst/testthat/helper_misc.R index b07b2437..68d3ca5a 100644 --- a/inst/testthat/helper_misc.R +++ b/inst/testthat/helper_misc.R @@ -37,7 +37,7 @@ MeasureDummy = R6Class("MeasureDummy", inherit = MeasureRegr, ) } private$.score_design = score_design - super$initialize(id = "dummy", range = c(0, 4), minimize = minimize) + super$initialize(id = "dummy", range = c(0, 4), minimize = minimize, properties = c("requires_task", "requires_learner")) } ), private = list( diff --git a/tests/testthat/test_FSelectInstanceAsyncSingleCrit.R b/tests/testthat/test_FSelectInstanceAsyncSingleCrit.R index 91c2b8ef..1b7fd91b 100644 --- a/tests/testthat/test_FSelectInstanceAsyncSingleCrit.R +++ b/tests/testthat/test_FSelectInstanceAsyncSingleCrit.R @@ -197,3 +197,65 @@ test_that("fast aggregation works", { expect_data_table(instance$archive$data, min.rows = 3L) expect_rush_reset(instance$rush, type = "kill") }) + +test_that("fast aggregation and benchmark result produce the same scores", { + skip_on_cran() + skip_if_not_installed("rush") + flush_redis() + + on.exit(mirai::daemons(0)) + mirai::daemons(1) + rush::rush_plan(n_workers = 1, worker_type = "remote") + + instance = fsi_async( + task = tsk("pima"), + learner = lrn("classif.rpart"), + resampling = rsmp("cv", folds = 3), + measures = msr("classif.ce"), + terminator = trm("evals", n_evals = 6) + ) + + fselector = fs("async_random_search") + fselector$optimize(instance) + + expect_equal(get_private(instance$objective)$.aggregator, async_aggregator_fast) + + expect_equal(instance$archive$data$classif.ce, + instance$archive$benchmark_result$aggregate(msr("classif.ce"))$classif.ce) +}) + +test_that("fast aggregation and benchmark result produce the same conditions", { + skip_on_cran() + skip_if_not_installed("rush") + flush_redis() + + on.exit(mirai::daemons(0)) + mirai::daemons(1) + rush::rush_plan(n_workers = 1, worker_type = "remote") + + + learner = lrn("classif.debug", error_train = 0.5, warning_train = 0.5) + learner$encapsulate("callr", fallback = lrn("classif.debug")) + + instance = fsi_async( + task = tsk("pima"), + learner = learner, + resampling = rsmp("cv", folds = 3), + measures = msr("classif.ce"), + terminator = trm("evals", n_evals = 6) + ) + + fselector = fs("async_random_search") + fselector$optimize(instance) + + expect_equal(get_private(instance$objective)$.aggregator, async_aggregator_fast) + + expect_equal(instance$archive$data$classif.ce, + instance$archive$benchmark_result$aggregate(msr("classif.ce"))$classif.ce) + + expect_equal(instance$archive$data$errors, + instance$archive$benchmark_result$aggregate(msr("classif.ce"), conditions = TRUE)$errors) + + expect_equal(instance$archive$data$warnings, + instance$archive$benchmark_result$aggregate(msr("classif.ce"), conditions = TRUE)$warnings) +}) diff --git a/tests/testthat/test_FSelectInstanceSingleCrit.R b/tests/testthat/test_FSelectInstanceSingleCrit.R index 1ff8d7db..9cfc6748 100644 --- a/tests/testthat/test_FSelectInstanceSingleCrit.R +++ b/tests/testthat/test_FSelectInstanceSingleCrit.R @@ -133,56 +133,47 @@ test_that("objective contains no benchmark results", { expect_null(instance$objective$.__enclos_env__$private$.benchmark_result) }) - -test_that("fast aggregation works", { - +test_that("fast aggregation and benchmark result produce the same scores", { instance = fsi( task = tsk("pima"), learner = lrn("classif.rpart"), resampling = rsmp("cv", folds = 3), measures = msr("classif.ce"), - terminator = trm("evals", n_evals = 3), - aggregate_fast = TRUE + terminator = trm("evals", n_evals = 6) ) - fselector = fs("random_search") + fselector = fs("random_search", batch_size = 2) fselector$optimize(instance) - expect_names(names(instance$archive$data), disjunct.from = c("warnings", "errors")) - expect_data_table(instance$archive$data, min.rows = 3L) + expect_equal(get_private(instance$objective)$.aggregator, aggregator_fast) + + expect_equal(instance$archive$data$classif.ce, + instance$archive$benchmark_result$aggregate(msr("classif.ce"))$classif.ce) }) -test_that("fast aggregation and default produce the same results", { - with_seed(123, { - instance_fast = fsi( - task = tsk("pima"), - learner = lrn("classif.rpart"), - resampling = rsmp("cv", folds = 3), - measures = msr("classif.ce"), - terminator = trm("evals", n_evals = 3), - aggregate_fast = TRUE - ) - - fselector = fs("random_search") - fselector$optimize(instance_fast) - scores_fast = instance_fast$archive$data$classif.ce - }) +test_that("fast aggregation and benchmark result produce the same conditions", { + learner = lrn("classif.debug", error_train = 0.5, warning_train = 0.5) + learner$encapsulate("callr", fallback = lrn("classif.debug")) - with_seed(123, { - instance_default = fsi( - task = tsk("pima"), - learner = lrn("classif.rpart"), - resampling = rsmp("cv", folds = 3), - measures = msr("classif.ce"), - terminator = trm("evals", n_evals = 3), - aggregate_fast = FALSE - ) - - fselector = fs("random_search") - fselector$optimize(instance_default) - scores_default = instance_default$archive$data$classif.ce - }) + instance = fsi( + task = tsk("pima"), + learner = learner, + resampling = rsmp("cv", folds = 3), + measures = msr("classif.ce"), + terminator = trm("evals", n_evals = 6) + ) - expect_equal(scores_fast, scores_default) -}) + fselector = fs("random_search", batch_size = 2) + fselector$optimize(instance) + expect_equal(get_private(instance$objective)$.aggregator, aggregator_fast) + + expect_equal(instance$archive$data$classif.ce, + instance$archive$benchmark_result$aggregate(msr("classif.ce"))$classif.ce) + + expect_equal(instance$archive$data$errors, + instance$archive$benchmark_result$aggregate(msr("classif.ce"), conditions = TRUE)$errors) + + expect_equal(instance$archive$data$warnings, + instance$archive$benchmark_result$aggregate(msr("classif.ce"), conditions = TRUE)$warnings) +}) From b1f2b56af9a700864a92662d7e7e16ef40796e7d Mon Sep 17 00:00:00 2001 From: be-marc Date: Fri, 6 Jun 2025 12:18:44 +0200 Subject: [PATCH 09/10] ... --- R/ObjectiveFSelectAsync.R | 4 ++-- .../test_FSelectInstanceAsyncSingleCrit.R | 24 ------------------- 2 files changed, 2 insertions(+), 26 deletions(-) diff --git a/R/ObjectiveFSelectAsync.R b/R/ObjectiveFSelectAsync.R index a974dce3..aea69cd8 100644 --- a/R/ObjectiveFSelectAsync.R +++ b/R/ObjectiveFSelectAsync.R @@ -39,8 +39,8 @@ ObjectiveFSelectAsync = R6Class("ObjectiveFSelectAsync", check_values = check_values, callbacks = callbacks ) - - if (self$codomain$length == 1 && all(c("requires_task", "requires_learner", "requires_model", "requires_train_set") %nin% self$measures$properties)) { + measure_properties = unlist(map(self$measures, "properties")) + if (self$codomain$length == 1 && all(c("requires_task", "requires_learner", "requires_model", "requires_train_set") %nin% measure_properties)) { private$.aggregator = async_aggregator_fast } else { private$.aggregator = async_aggregator_default diff --git a/tests/testthat/test_FSelectInstanceAsyncSingleCrit.R b/tests/testthat/test_FSelectInstanceAsyncSingleCrit.R index 1b7fd91b..84bd2205 100644 --- a/tests/testthat/test_FSelectInstanceAsyncSingleCrit.R +++ b/tests/testthat/test_FSelectInstanceAsyncSingleCrit.R @@ -173,30 +173,6 @@ test_that("saving the models with FSelectInstanceAsyncSingleCrit works", { # fselector$optimize(instance) # }) -test_that("fast aggregation works", { - skip_on_cran() - skip_if_not_installed("rush") - flush_redis() - - on.exit(mirai::daemons(0)) - mirai::daemons(2) - rush::rush_plan(n_workers = 2, worker_type = "remote") - - instance = fsi_async( - task = tsk("pima"), - learner = lrn("classif.rpart"), - resampling = rsmp("cv", folds = 3), - measures = msr("classif.ce"), - terminator = trm("evals", n_evals = 3), - aggregate_fast = TRUE - ) - - fselector = fs("async_random_search") - fselector$optimize(instance) - - expect_data_table(instance$archive$data, min.rows = 3L) - expect_rush_reset(instance$rush, type = "kill") -}) test_that("fast aggregation and benchmark result produce the same scores", { skip_on_cran() From dcd97db9bd1b65cff50f608ab3d2f8db356ef290 Mon Sep 17 00:00:00 2001 From: be-marc Date: Fri, 6 Jun 2025 12:31:09 +0200 Subject: [PATCH 10/10] ... --- man/FSelectInstanceAsyncSingleCrit.Rd | 9 +-------- man/FSelectInstanceBatchSingleCrit.Rd | 9 +-------- man/ObjectiveFSelectAsync.Rd | 9 +-------- man/ObjectiveFSelectBatch.Rd | 9 +-------- man/fselect.Rd | 9 +-------- man/fsi.Rd | 9 +-------- man/fsi_async.Rd | 9 +-------- 7 files changed, 7 insertions(+), 56 deletions(-) diff --git a/man/FSelectInstanceAsyncSingleCrit.Rd b/man/FSelectInstanceAsyncSingleCrit.Rd index 12bd0c74..43c87c58 100644 --- a/man/FSelectInstanceAsyncSingleCrit.Rd +++ b/man/FSelectInstanceAsyncSingleCrit.Rd @@ -81,8 +81,7 @@ Creates a new instance of this \link[R6:R6Class]{R6} class. check_values = FALSE, callbacks = NULL, ties_method = "least_features", - rush = NULL, - aggregate_fast = FALSE + rush = NULL )}\if{html}{\out{}} } @@ -129,12 +128,6 @@ Ignored if multiple measures are used.} \item{\code{rush}}{(\code{Rush})\cr If a rush instance is supplied, the optimization runs without batches.} - -\item{\code{aggregate_fast}}{(\code{logical(1)})\cr -If \code{TRUE}, the performance values are aggregated in a fast way. -This is only supported for measures that do not require task, learner, model or train set. -The archive does not contain warnings and errors. -Default is \code{FALSE}.} } \if{html}{\out{}} } diff --git a/man/FSelectInstanceBatchSingleCrit.Rd b/man/FSelectInstanceBatchSingleCrit.Rd index b61ea0e1..bb840beb 100644 --- a/man/FSelectInstanceBatchSingleCrit.Rd +++ b/man/FSelectInstanceBatchSingleCrit.Rd @@ -137,8 +137,7 @@ Creates a new instance of this \link[R6:R6Class]{R6} class. store_models = FALSE, check_values = FALSE, callbacks = NULL, - ties_method = "least_features", - aggregate_fast = FALSE + ties_method = "least_features" )}\if{html}{\out{}} } @@ -182,12 +181,6 @@ The option \code{"least_features"} (default) selects the feature set with the le If there are multiple best feature sets with the same number of features, one is selected randomly. The \code{random} method returns a random feature set from the best feature sets. Ignored if multiple measures are used.} - -\item{\code{aggregate_fast}}{(\code{logical(1)})\cr -If \code{TRUE}, the performance values are aggregated in a fast way. -This is only supported for measures that do not require task, learner, model or train set. -The archive does not contain warnings and errors. -Default is \code{FALSE}.} } \if{html}{\out{}} } diff --git a/man/ObjectiveFSelectAsync.Rd b/man/ObjectiveFSelectAsync.Rd index 9d1f9d40..3bbaf11b 100644 --- a/man/ObjectiveFSelectAsync.Rd +++ b/man/ObjectiveFSelectAsync.Rd @@ -43,8 +43,7 @@ Creates a new instance of this \link[R6:R6Class]{R6} class. check_values = TRUE, store_benchmark_result = TRUE, store_models = FALSE, - callbacks = NULL, - aggregate_fast = FALSE + callbacks = NULL )}\if{html}{\out{}} } @@ -78,12 +77,6 @@ Store models in benchmark result?} \item{\code{callbacks}}{(list of \link{CallbackBatchFSelect})\cr List of callbacks.} - -\item{\code{aggregate_fast}}{(\code{logical(1)})\cr -If \code{TRUE}, the performance values are aggregated in a fast way. -This is only supported for measures that do not require task, learner, model or train set. -The archive does not contain warnings and errors. -Default is \code{FALSE}.} } \if{html}{\out{}} } diff --git a/man/ObjectiveFSelectBatch.Rd b/man/ObjectiveFSelectBatch.Rd index 84116d62..ba9e1e37 100644 --- a/man/ObjectiveFSelectBatch.Rd +++ b/man/ObjectiveFSelectBatch.Rd @@ -51,8 +51,7 @@ Creates a new instance of this \link[R6:R6Class]{R6} class. store_benchmark_result = TRUE, store_models = FALSE, archive = NULL, - callbacks = NULL, - aggregate_fast = FALSE + callbacks = NULL )}\if{html}{\out{}} } @@ -90,12 +89,6 @@ If \code{NULL} (default), benchmark result and models cannot be stored.} \item{\code{callbacks}}{(list of \link{CallbackBatchFSelect})\cr List of callbacks.} - -\item{\code{aggregate_fast}}{(\code{logical(1)})\cr -If \code{TRUE}, the performance values are aggregated in a fast way. -This is only supported for measures that do not require task, learner, model or train set. -The archive does not contain warnings and errors. -Default is \code{FALSE}.} } \if{html}{\out{}} } diff --git a/man/fselect.Rd b/man/fselect.Rd index c05d949d..42365f78 100644 --- a/man/fselect.Rd +++ b/man/fselect.Rd @@ -18,8 +18,7 @@ fselect( check_values = FALSE, callbacks = NULL, ties_method = "least_features", - rush = NULL, - aggregate_fast = FALSE + rush = NULL ) } \arguments{ @@ -75,12 +74,6 @@ Ignored if multiple measures are used.} \item{rush}{(\code{Rush})\cr If a rush instance is supplied, the optimization runs without batches.} - -\item{aggregate_fast}{(\code{logical(1)})\cr -If \code{TRUE}, the performance values are aggregated in a fast way. -This is only supported for measures that do not require task, learner, model or train set. -The archive does not contain warnings and errors. -Default is \code{FALSE}.} } \value{ \link{FSelectInstanceBatchSingleCrit} | \link{FSelectInstanceBatchMultiCrit} diff --git a/man/fsi.Rd b/man/fsi.Rd index c983e853..af21f23c 100644 --- a/man/fsi.Rd +++ b/man/fsi.Rd @@ -14,8 +14,7 @@ fsi( store_models = FALSE, check_values = FALSE, callbacks = NULL, - ties_method = "least_features", - aggregate_fast = FALSE + ties_method = "least_features" ) } \arguments{ @@ -57,12 +56,6 @@ The option \code{"least_features"} (default) selects the feature set with the le If there are multiple best feature sets with the same number of features, one is selected randomly. The \code{random} method returns a random feature set from the best feature sets. Ignored if multiple measures are used.} - -\item{aggregate_fast}{(\code{logical(1)})\cr -If \code{TRUE}, the performance values are aggregated in a fast way. -This is only supported for measures that do not require task, learner, model or train set. -The archive does not contain warnings and errors. -Default is \code{FALSE}.} } \description{ Function to construct a \link{FSelectInstanceBatchSingleCrit} or \link{FSelectInstanceBatchMultiCrit}. diff --git a/man/fsi_async.Rd b/man/fsi_async.Rd index 67fc0599..f53f5dc7 100644 --- a/man/fsi_async.Rd +++ b/man/fsi_async.Rd @@ -15,8 +15,7 @@ fsi_async( check_values = FALSE, callbacks = NULL, ties_method = "least_features", - rush = NULL, - aggregate_fast = FALSE + rush = NULL ) } \arguments{ @@ -61,12 +60,6 @@ Ignored if multiple measures are used.} \item{rush}{(\code{Rush})\cr If a rush instance is supplied, the optimization runs without batches.} - -\item{aggregate_fast}{(\code{logical(1)})\cr -If \code{TRUE}, the performance values are aggregated in a fast way. -This is only supported for measures that do not require task, learner, model or train set. -The archive does not contain warnings and errors. -Default is \code{FALSE}.} } \description{ Function to construct a \link{FSelectInstanceAsyncSingleCrit} or \link{FSelectInstanceAsyncMultiCrit}.