Skip to content

Commit c00e529

Browse files
authored
AD | Added AutoTS/Sklearn Models as Operator Models with Contamination Parameter Support (#955)
2 parents 505985b + 5614af3 commit c00e529

File tree

9 files changed

+65
-96
lines changed

9 files changed

+65
-96
lines changed

ads/opctl/operator/lowcode/anomaly/const.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,15 @@
1111
class SupportedModels(str, metaclass=ExtendedEnumMeta):
1212
"""Supported anomaly models."""
1313

14-
AutoMLX = "automlx"
1514
AutoTS = "autots"
1615
Auto = "auto"
17-
# TODS = "tods"
16+
IQR = "iqr"
17+
LOF = "lof"
18+
ZSCORE = "zscore"
19+
ROLLING_ZSCORE = "rolling_zscore"
20+
MAD = "mad"
21+
EE = "ee"
22+
ISOLATIONFOREST = "isolationforest"
1823

1924
class NonTimeADSupportedModels(str, metaclass=ExtendedEnumMeta):
2025
"""Supported non time-based anomaly detection models."""

ads/opctl/operator/lowcode/anomaly/model/autots.py

Lines changed: 30 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -4,80 +4,75 @@
44
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
55
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
66

7-
import pandas as pd
8-
97
from ads.common.decorator.runtime_dependency import runtime_dependency
10-
11-
from .base_model import AnomalyOperatorBaseModel
12-
from .anomaly_dataset import AnomalyOutput
138
from ads.opctl.operator.lowcode.anomaly.const import OutputColumns
9+
from .anomaly_dataset import AnomalyOutput
10+
from .base_model import AnomalyOperatorBaseModel
11+
from ..const import SupportedModels
12+
from ads.opctl import logger
1413

1514

1615
class AutoTSOperatorModel(AnomalyOperatorBaseModel):
1716
"""Class representing AutoTS Anomaly Detection operator model."""
17+
model_mapping = {
18+
"isolationforest": "IsolationForest",
19+
"lof": "LOF",
20+
"ee": "EE",
21+
"zscore": "zscore",
22+
"rolling_zscore": "rolling_zscore",
23+
"mad": "mad",
24+
"minmax": "minmax",
25+
"iqr": "IQR"
26+
}
1827

1928
@runtime_dependency(
2029
module="autots",
2130
err_msg=(
22-
"Please run `pip3 install autots` to "
23-
"install the required dependencies for AutoTS."
31+
"Please run `pip3 install autots` to "
32+
"install the required dependencies for AutoTS."
2433
),
2534
)
2635
def _build_model(self) -> AnomalyOutput:
2736
from autots.evaluator.anomaly_detector import AnomalyDetector
2837

29-
method = self.spec.model_kwargs.get("method")
30-
transform_dict = self.spec.model_kwargs.get("transform_dict", {})
31-
32-
if method == "random" or method == "deep" or method == "fast":
33-
new_params = AnomalyDetector.get_new_params(method=method)
34-
transform_dict = new_params.pop("transform_dict")
35-
36-
for key, value in new_params.items():
37-
self.spec.model_kwargs[key] = value
38-
39-
if self.spec.model_kwargs.get("output") is None:
40-
self.spec.model_kwargs["output"] = "univariate"
41-
42-
if "transform_dict" not in self.spec.model_kwargs:
43-
self.spec.model_kwargs["transform_dict"] = transform_dict
44-
45-
if self.spec.contamination != 0.1: # TODO: remove hard-coding
46-
self.spec.model_kwargs.get("method_params", {})[
47-
"contamination"
48-
] = self.spec.contamination
49-
50-
model = AnomalyDetector(**self.spec.model_kwargs)
38+
method = SupportedModels.ISOLATIONFOREST if self.spec.model == SupportedModels.AutoTS else self.spec.model
39+
model_params = {"method": self.model_mapping[method],
40+
"transform_dict": self.spec.model_kwargs.get("transform_dict", {}),
41+
"output": self.spec.model_kwargs.get("output", "univariate"), "method_params": {}}
42+
# Supported methods with contamination param
43+
if method in [SupportedModels.ISOLATIONFOREST, SupportedModels.LOF, SupportedModels.EE]:
44+
model_params["method_params"][
45+
"contamination"] = self.spec.contamination if self.spec.contamination else 0.01
46+
else:
47+
if self.spec.contamination:
48+
raise ValueError(f"The contamination parameter is not supported for the selected model \"{method}\"")
49+
logger.info(f"model params: {model_params}")
50+
51+
model = AnomalyDetector(**model_params)
5152

5253
date_column = self.spec.datetime_column.name
5354

5455
anomaly_output = AnomalyOutput(date_column=date_column)
5556

5657
for target, df in self.datasets.full_data_dict.items():
5758
data = df.set_index(date_column)
58-
5959
(anomaly, score) = model.detect(data)
60-
6160
if len(anomaly.columns) == 1:
6261
score.rename(
6362
columns={score.columns.values[0]: OutputColumns.SCORE_COL},
6463
inplace=True,
6564
)
6665
score = 1 - score
6766
score = score.reset_index(drop=False)
68-
6967
col = anomaly.columns.values[0]
7068
anomaly[col] = anomaly[col].replace({1: 0, -1: 1})
7169
anomaly.rename(columns={col: OutputColumns.ANOMALY_COL}, inplace=True)
7270
anomaly = anomaly.reset_index(drop=False)
73-
7471
anomaly_output.add_output(target, anomaly, score)
75-
7672
else:
7773
raise NotImplementedError(
7874
"Multi-Output Anomaly Detection is not yet supported in autots"
7975
)
80-
8176
return anomaly_output
8277

8378
def _generate_report(self):

ads/opctl/operator/lowcode/anomaly/model/factory.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,14 @@
44
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
55

66
from ads.opctl.operator.lowcode.anomaly.utils import select_auto_model
7-
8-
from ..const import NonTimeADSupportedModels, SupportedModels
9-
from ..operator_config import AnomalyOperatorConfig
107
from .anomaly_dataset import AnomalyDatasets
11-
from .automlx import AutoMLXOperatorModel
128
from .autots import AutoTSOperatorModel
13-
14-
# from .tods import TODSOperatorModel
159
from .base_model import AnomalyOperatorBaseModel
1610
from .isolationforest import IsolationForestOperatorModel
1711
from .oneclasssvm import OneClassSVMOperatorModel
1812
from .randomcutforest import RandomCutForestOperatorModel
13+
from ..const import NonTimeADSupportedModels, SupportedModels
14+
from ..operator_config import AnomalyOperatorConfig
1915

2016

2117
class UnSupportedModelError(Exception):
@@ -45,9 +41,14 @@ class AnomalyOperatorModelFactory:
4541
"""
4642

4743
_MAP = {
48-
SupportedModels.AutoMLX: AutoMLXOperatorModel,
49-
# SupportedModels.TODS: TODSOperatorModel,
5044
SupportedModels.AutoTS: AutoTSOperatorModel,
45+
SupportedModels.IQR: AutoTSOperatorModel,
46+
SupportedModels.LOF: AutoTSOperatorModel,
47+
SupportedModels.ISOLATIONFOREST: AutoTSOperatorModel,
48+
SupportedModels.ZSCORE: AutoTSOperatorModel,
49+
SupportedModels.ROLLING_ZSCORE: AutoTSOperatorModel,
50+
SupportedModels.EE: AutoTSOperatorModel,
51+
SupportedModels.MAD: AutoTSOperatorModel
5152
}
5253

5354
_NonTime_MAP = {

ads/opctl/operator/lowcode/anomaly/schema.yaml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -364,15 +364,21 @@ spec:
364364
- oneclasssvm
365365
- isolationforest
366366
- randomcutforest
367+
- iqr
368+
- lof
369+
- zscore
370+
- rolling_zscore
371+
- mad
372+
- ee
367373
meta:
368374
description: "The model to be used for anomaly detection"
369375

370376
contamination:
371377
required: false
372-
default: 0.1
378+
default: 0.01
373379
type: float
374380
meta:
375-
description: "Fraction of training dataset corresponding to anomalies (between 0.0 and 0.5)"
381+
description: "The proportion of outliers in the data set. The contamination should be in the range (0, 0.5]"
376382

377383
model_kwargs:
378384
type: dict

docs/source/user_guide/operators/anomaly_detection_operator/advanced_use_cases.rst

Lines changed: 1 addition & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@ The Science of Anomaly Detection
88
Anomaly Detection comes in many forms. We will go through some of these and give guidance as to whether this Operator is going to be helpful for each use case.
99

1010
* Constructive v Destructive v Pre-Processing: This Operator focuses on the Constructive and Pre-Processing use cases. Destructive can work, but more specific parameters may be required.
11-
* Supervised v Semi-Supervised v Unsupervised: All 3 of these approaches are supported by AutoMLX. AutoTS supports only Unsupervised at this time.
12-
* Time Series. This Operator requires time-series data.
11+
* The operator currently supports only unsupervised learning and works with both time-series and non-time-series data.
1312

1413

1514
Data Parameterization
@@ -51,40 +50,3 @@ Data Parameterization
5150
datetime_column:
5251
name: ds
5352
target_column: y
54-
55-
56-
Model Parameterization
57-
----------------------
58-
59-
**Specify Model Type**
60-
61-
Sometimes users will know which models they want to use. When users know this in advance, they can specify using the ``model_kwargs`` dictionary. In the following example, we will instruct the model to *only* use the ``IsolationForestOD`` model.
62-
63-
.. code-block:: yaml
64-
65-
kind: operator
66-
type: anomaly
67-
version: v1
68-
spec:
69-
model: automlx
70-
model_kwargs:
71-
model_list:
72-
- IsolationForestOD
73-
search_space:
74-
IsolationForestOD:
75-
n_estimators:
76-
range': [10, 50]
77-
type': 'discrete'
78-
79-
80-
AutoTS offers the same extensibility:
81-
82-
.. code-block:: yaml
83-
84-
kind: operator
85-
type: anomaly
86-
version: v1
87-
spec:
88-
model: autots
89-
model_kwargs:
90-
method: IQR

docs/source/user_guide/operators/anomaly_detection_operator/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ If you have additional variables that you think might be related, then you shoul
2727

2828
**Auto Model Selection**
2929

30-
Operators users don't need to know anything about the underlying models in order to use them. By default we set ``model: auto``. However, some users want more control over the modeling parameters. These users can set the ``model`` parameter to either ``autots`` or ``automlx`` and then pass parameters directly into ``model_kwargs``. See :doc:`Advanced Examples <./advanced_use_cases>`
30+
Operators users don't need to know anything about the underlying models in order to use them. By default we set ``model: auto``. However, some users want more control over the modeling parameters. These users can set the ``model`` parameter to ``isolationforest``, ``lof``, ``ee``, ``zscore``, ``rolling_zscore``, ``mad``, ``mixmax``, ``iqr`` or ``autots`` and then pass parameters directly into ``model_kwargs``. See :doc:`Advanced Examples <./advanced_use_cases>`
3131

3232
**Anomaly Detection Documentation**
3333

docs/source/user_guide/operators/anomaly_detection_operator/productionize.rst

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -156,9 +156,7 @@ The yaml can also be maximally stated as follows:
156156
datetime_column:
157157
format: "%d/%m/%y"
158158
name: Date
159-
model: automlx
160-
model_kwargs:
161-
time_budget: 100
159+
model: ee
162160
preprocessing: true
163161
generate_metrics: true
164162
generate_report: true

docs/source/user_guide/operators/anomaly_detection_operator/yaml_schema.rst

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@ Here is an example anomaly.yaml with every parameter specified:
1616
name: Date
1717
input_data:
1818
url: data.csv
19-
model: auto
19+
model: isolationforest
20+
contamination: 0.005
2021
target_column: target
22+
target_category_columns: ['series']
2123
2224
2325
* **Kind**: The yaml file always starts with ``kind: operator``. There are many other kinds of yaml files that can be run by ``ads opctl``, so we need to specify this is an operator.
@@ -39,7 +41,8 @@ Here is an example anomaly.yaml with every parameter specified:
3941
* **output_directory**: (optional) This dictionary contains the details for where to put the output artifacts. The directory need not exist, but must be accessible by the Operator during runtime.
4042
* **url**: Insert the uri for the dataset if it's on object storage or Data Lake using the URI pattern ``oci://<bucket>@<namespace>/subfolder/``.
4143
* **kwargs**: Insert any other args for pandas to load the data (``format``, ``options``, etc.) See full list in ``YAML Schema`` section.
42-
* **model**: (optional) The name of the model framework you want to use. Defaults to "auto". Other options are: ``autots``, and ``auto``.
44+
* **model**: (optional) The name of the model framework you want to use. Defaults to "auto". Other options are: iqr, lof, zscore, rolling_zscore, isolationforest, mad, ee, autots and auto.
45+
* **contamination**: The proportion of outliers in the data set. The contamination should be in the range (0, 0.5]. This parameter is supported only by specific methods, i.e. isolationforest, lof and ee. If used with other models, this parameter will be ignored.
4346
* **model_kwargs**: (optional) This kwargs dict passes straight through to the model framework. If you want to take direct control of the modeling, this is the best way.
4447
* **test_data**: (optional) This dictionary contains the details for how to read the test data. Test data should contain every datetime value of the input_data, (optionally) all of the series from target_category_columns, and a column titles "anomaly" with either a 1 (non-anomalous) or 0 (anomalous).
4548
* **url**: Insert the uri for the dataset if it's on object storage or Data Lake using the URI pattern ``oci://<bucket>@<namespace>/path/to/data.csv``.

tests/operators/anomaly/test_anomaly_simple.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
44
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
55

6-
from ads.opctl.operator.lowcode.anomaly.const import NonTimeADSupportedModels
6+
from ads.opctl.operator.lowcode.anomaly.const import NonTimeADSupportedModels, SupportedModels
77
import yaml
88
import subprocess
99
import pandas as pd
@@ -16,8 +16,7 @@
1616
from datetime import datetime
1717
from ads.opctl.operator.cmd import run
1818

19-
20-
MODELS = ["autots"] # "automlx",
19+
MODELS = ["autots", "iqr", "lof", "zscore", "rolling_zscore", "mad", "ee", "isolationforest"]
2120

2221
# Mandatory YAML parameters
2322
TEMPLATE_YAML = {
@@ -218,7 +217,7 @@ def test_load_datasets(model, data_dict):
218217
yaml_i = deepcopy(TEMPLATE_YAML)
219218
yaml_i["spec"]["model"] = model
220219
yaml_i["spec"]["input_data"]["url"] = data_dict["url"]
221-
if model in NonTimeADSupportedModels.values():
220+
if model in set(NonTimeADSupportedModels.values()) - set(SupportedModels.values()):
222221
del yaml_i["spec"]["datetime_column"]
223222
else:
224223
yaml_i["spec"]["datetime_column"]["name"] = data_dict["dt_col"]

0 commit comments

Comments
 (0)