rodrigo-arenas · rodrigo-arenas · Jul 23, 2025 · Jul 23, 2025 · Jul 23, 2025 · Jul 23, 2025
diff --git a/docs/index.rst b/docs/index.rst
@@ -68,6 +68,7 @@ This command will install all the extra requirements::
    tutorials/adapters
    tutorials/understand_cv
    tutorials/mlflow
+   tutorials/outliers
    tutorials/reproducibility
 
 .. toctree::

diff --git a/docs/release_notes.rst b/docs/release_notes.rst
@@ -3,6 +3,15 @@ Release Notes
 
 Some notes on new features in various releases
 
+What's new in 0.12.0
+--------------------
+
+^^^^^^^^^
+Features:
+^^^^^^^^^
+
+* Added compatibility for outlier detection algorithms
+
 What's new in 0.11.1
 --------------------
 

diff --git a/docs/tutorials/outliers.rst b/docs/tutorials/outliers.rst
@@ -0,0 +1,120 @@
+.. _outlier-detection:
+
+Outlier Detection Support
+=========================
+
+Overview
+--------
+
+`sklearn-genetic` now includes native support for tuning outlier detection models such as
+`IsolationForest`, `OneClassSVM`, and `LocalOutlierFactor` using `GASearchCV` and `GAFeatureSelectionCV`.
+These models are recognized automatically, and a default scoring function is applied when
+`scoring=None` is passed.
+
+This feature simplifies hyperparameter optimization for unsupervised anomaly detection problems,
+where `y` labels are not available.
+
+Default Scoring Logic
+----------------------
+
+When `scoring=None` and an estimator is recognized as an outlier detector, a default scorer is used.
+This scorer attempts the following, in order:
+
+1. If the estimator has `score_samples`, the mean of the scores is used.
+2. If `score_samples` is unavailable but `decision_function` exists, its mean value is used.
+3. As a fallback, the estimator is used with `fit_predict`, and the mean of `(predictions == 1)` is returned.
+
+This scoring system is designed to maximize flexibility and compatibility with a wide range of outlier models.
+
+.. code-block:: python
+
+    def default_outlier_scorer(estimator, X, y=None):
+        if hasattr(estimator, 'score_samples'):
+            return np.mean(estimator.score_samples(X))
+        elif hasattr(estimator, 'decision_function'):
+            return np.mean(estimator.decision_function(X))
+        else:
+            predictions = estimator.fit_predict(X)
+            return np.mean(predictions == 1)
+
+Examples
+--------
+
+Using `GASearchCV` with `IsolationForest`:
+
+.. code-block:: python
+
+    from sklearn.ensemble import IsolationForest
+    from sklearn_genetic import GASearchCV
+    from sklearn_genetic.space import Integer, Continuous
+    from sklearn.datasets import make_blobs
+    import numpy as np
+
+    # Create synthetic data with outliers
+    X_normal, _ = make_blobs(n_samples=200, centers=1, n_features=4, random_state=42)
+    X_outliers = np.random.uniform(low=-6, high=6, size=(20, 4))
+    X = np.vstack([X_normal, X_outliers])
+
+    estimator = IsolationForest(random_state=42)
+
+    param_grid = {
+        'contamination': Continuous(0.05, 0.3),
+        'n_estimators': Integer(50, 150)
+    }
+
+    search = GASearchCV(estimator=estimator,
+                        param_grid=param_grid,
+                        scoring=None,  # triggers default_outlier_scorer
+                        cv=3,
+                        generations=4,
+                        population_size=6,
+                        n_jobs=-1)
+
+    search.fit(X)
+
+Using `GAFeatureSelectionCV` with outlier detection:
+
+.. code-block:: python
+
+    from sklearn_genetic import GAFeatureSelectionCV
+    from sklearn.ensemble import IsolationForest
+
+    selector = GAFeatureSelectionCV(
+        estimator=IsolationForest(random_state=42),
+        scoring=None,  # default_outlier_scorer used
+        cv=3,
+        generations=4,
+        population_size=6,
+        n_jobs=-1
+    )
+
+    selector.fit(X)
+
+Custom Scoring
+--------------
+
+You may override the default logic by passing your own custom scoring function:
+
+.. code-block:: python
+
+    def custom_score(estimator, X, y=None):
+        return np.std(estimator.score_samples(X))
+
+    search = GASearchCV(
+        estimator=IsolationForest(),
+        param_grid=param_grid,
+        scoring=custom_score,
+        cv=3,
+        generations=4,
+        population_size=6,
+        n_jobs=1
+    )
+
+    search.fit(X)
+
+Limitations
+-----------
+
+- Only estimators with `fit_predict`, `decision_function`, or `score_samples` are supported by default.
+- Models not recognized as outlier detectors must be scored explicitly or will raise a `ValueError`.
+
diff --git a/sklearn_genetic/_version.py b/sklearn_genetic/_version.py
@@ -1 +1 @@
-__version__ = "0.12.0dev"
+__version__ = "0.12.0"
diff --git a/sklearn_genetic/tests/test_feature_selection.py b/sklearn_genetic/tests/test_feature_selection.py
@@ -194,7 +194,7 @@ def test_negative_criteria():
     evolved_estimator = GAFeatureSelectionCV(
         clf,
         cv=3,
-        scoring="neg_max_error",
+        scoring="neg_mean_squared_error",
         population_size=5,
         generations=generations,
         tournament_size=3,

diff --git a/sklearn_genetic/tests/test_mlflow.py b/sklearn_genetic/tests/test_mlflow.py
@@ -21,6 +21,38 @@
 def mlflow_resources():
     uri = mlflow.get_tracking_uri()
     client = MlflowClient(uri)
+    return uri, client
+
+
+@pytest.fixture
+def mlflow_run(mlflow_resources):
+    _, client = mlflow_resources
+    exp_id = client.get_experiment_by_name(EXPERIMENT_NAME).experiment_id
+    active_run = client.search_runs(exp_id, run_view_type=ViewType.ACTIVE_ONLY)
+    runs = [run.info.run_id for run in active_run]
+    return runs
+
+
+def test_mlflow_config(mlflow_resources):
+    """
+    Check MLflow config creation.
+    """
+    uri, _ = mlflow_resources
+    mlflow_config = MLflowConfig(
+        tracking_uri=uri,
+        experiment=EXPERIMENT_NAME,
+        run_name="Decision Tree",
+        save_models=True,
+        tags={"team": "sklearn-genetic-opt", "version": "0.5.0"},
+    )
+    assert isinstance(mlflow_config, MLflowConfig)
+
+
+def test_runs(mlflow_resources, mlflow_run):
+    """
+    Check if runs are captured and parameters are true.
+    """
+    uri, client = mlflow_resources
     mlflow_config = MLflowConfig(
         tracking_uri=uri,
         experiment=EXPERIMENT_NAME,
@@ -65,44 +97,15 @@ def mlflow_resources():
     )
 
     evolved_estimator.fit(X_train, y_train)
-
-    return uri, client, mlflow_config
-
-
-@pytest.fixture
-def mlflow_run(mlflow_resources):
-    _, client, _ = mlflow_resources
-    exp = client.get_experiment_by_name(EXPERIMENT_NAME)
-    if exp is None:
-        exp_id = client.create_experiment(EXPERIMENT_NAME)
-    else:
-        exp_id = exp.experiment_id
-    active_run = client.search_runs(exp_id, run_view_type=ViewType.ALL)
-    runs = [run.info.run_id for run in active_run]
-    return runs
-
-
-@pytest.mark.order(1)
-def test_mlflow_config(mlflow_resources):
-    """
-    Check MLflow config creation.
-    """
-    uri, _, mlflow_config = mlflow_resources
-    assert isinstance(mlflow_config, MLflowConfig)
-
-
-@pytest.mark.order(2)
-def test_runs(mlflow_resources, mlflow_run):
-    """
-    Check if runs are captured and parameters are true.
-    """
+    y_predict_ga = evolved_estimator.predict(X_test)
 
     runs = mlflow_run
-    assert len(runs) >= 1
+    assert len(runs) >= 1 and evolved_estimator.best_params_["min_weight_fraction_leaf"]
 
 
-@pytest.mark.order(3)
 def test_mlflow_artifacts(mlflow_resources, mlflow_run):
+    import os
+    import mlflow
 
     _, client = mlflow_resources
     run_id = mlflow_run[0]
@@ -128,11 +131,12 @@ def test_mlflow_artifacts(mlflow_resources, mlflow_run):
 
 
 
+
 def test_mlflow_params(mlflow_resources, mlflow_run):
     """
     Test parameters are all in the run and within range.
     """
-    _, client, _ = mlflow_resources
+    _, client = mlflow_resources
     run_id = mlflow_run[0]
     run = client.get_run(run_id)
     params = run.data.params
@@ -148,7 +152,6 @@ def test_mlflow_after_run(mlflow_resources, mlflow_run):
     Check that the run has logged expected artifacts, metrics, and hyperparameters to the MLflow server.
     """
     run_id = mlflow_run[0]
-
     _, client = mlflow_resources
 
     run = client.get_run(run_id)

diff --git a/..._genetic/tests/test_outliner_detection.py → ...n_genetic/tests/test_outlier_detection.py b/..._genetic/tests/test_outliner_detection.py → ...n_genetic/tests/test_outlier_detection.py
@@ -198,7 +198,7 @@ def mock_is_outlier_detector(est):
         sg_module.is_outlier_detector = original_is_outlier_detector
 
 
-def test_importerror_fallback():
+def test_import_error_fallback():
     """Test the ImportError fallback for is_outlier_detector"""
     import sklearn_genetic.genetic_search as sg_module