Skip to content

Commit 5e5e325

Browse files
authored
Merge branch 'main' into ODSC-59115/mlforecast_add_data
2 parents c905f30 + 67e66f7 commit 5e5e325

36 files changed

+104251
-75
lines changed

ads/opctl/operator/lowcode/anomaly/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ The operator will run in your local environment without requiring any additional
5858

5959
## 4. Running anomaly detection on the local container
6060

61-
To run the anomaly detection detection operator within a local container, follow these steps:
61+
To run the anomaly detection operator within a local container, follow these steps:
6262

6363
Use the command below to build the anomaly detection container.
6464

@@ -106,7 +106,7 @@ ads operator run -f ~/anomaly/anomaly.yaml --backend-config ~/anomaly/backend_op
106106

107107
## 5. Running anomaly detection in the Data Science job within container runtime
108108

109-
To execute the anomaly detection detection operator within a Data Science job using container runtime, please follow the steps outlined below:
109+
To execute the anomaly detection operator within a Data Science job using container runtime, please follow the steps outlined below:
110110

111111
You can use the following command to build the anomaly detection container. This step can be skipped if you have already done this for running the operator within a local container.
112112

@@ -155,7 +155,7 @@ ads opctl watch <OCID>
155155

156156
## 6. Running anomaly detection in the Data Science job within conda runtime
157157

158-
To execute the anomaly detection detection operator within a Data Science job using conda runtime, please follow the steps outlined below:
158+
To execute the anomaly detection operator within a Data Science job using conda runtime, please follow the steps outlined below:
159159

160160
You can use the following command to build the anomaly detection conda environment.
161161

ads/opctl/operator/lowcode/anomaly/__main__.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#!/usr/bin/env python
2-
# -*- coding: utf-8 -*--
32

43
# Copyright (c) 2024 Oracle and/or its affiliates.
54
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
@@ -15,7 +14,7 @@
1514
from ads.opctl.operator.common.const import ENV_OPERATOR_ARGS
1615
from ads.opctl.operator.common.utils import _parse_input_args
1716

18-
from .model.anomaly_dataset import AnomalyDatasets, AnomalyData
17+
from .model.anomaly_dataset import AnomalyDatasets
1918
from .operator_config import AnomalyOperatorConfig
2019

2120

@@ -34,7 +33,7 @@ def operate(operator_config: AnomalyOperatorConfig) -> None:
3433
f"Failed to forecast with error {e.args}. Trying again with model `autots`."
3534
)
3635
operator_config.spec.model = "autots"
37-
operator_config.spec.model_kwargs = dict()
36+
operator_config.spec.model_kwargs = {}
3837
datasets = AnomalyDatasets(operator_config.spec)
3938
try:
4039
AnomalyOperatorModelFactory.get_model(
@@ -44,12 +43,12 @@ def operate(operator_config: AnomalyOperatorConfig) -> None:
4443
logger.debug(
4544
f"Failed to backup forecast with error {ee.args}. Raising original error."
4645
)
47-
raise ee
46+
raise ee
4847
else:
4948
raise e
5049

5150

52-
def verify(spec: Dict, **kwargs: Dict) -> bool:
51+
def verify(spec: Dict) -> bool:
5352
"""Verifies the anomaly detection operator config."""
5453
operator = AnomalyOperatorConfig.from_dict(spec)
5554
msg_header = (
@@ -83,7 +82,7 @@ def main(raw_args: List[str]):
8382
yaml_string = yaml.safe_dump(json.loads(operator_spec_str))
8483
except json.JSONDecodeError:
8584
yaml_string = yaml.safe_dump(yaml.safe_load(operator_spec_str))
86-
except:
85+
except Exception:
8786
yaml_string = operator_spec_str
8887

8988
operator_config = AnomalyOperatorConfig.from_yaml(

ads/opctl/operator/lowcode/anomaly/const.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,14 @@ class SupportedModels(str, metaclass=ExtendedEnumMeta):
1616
Auto = "auto"
1717
# TODS = "tods"
1818

19+
class NonTimeADSupportedModels(str, metaclass=ExtendedEnumMeta):
20+
"""Supported non time-based anomaly detection models."""
21+
22+
OneClassSVM = "oneclasssvm"
23+
IsolationForest = "isolationforest"
24+
# TODO : Add DBScan
25+
# DBScan = "dbscan"
26+
1927

2028
class TODSSubModels(str, metaclass=ExtendedEnumMeta):
2129
"""Supported TODS sub models."""

ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,17 +84,21 @@ def get_inliers_by_cat(self, category: str, data: pd.DataFrame):
8484
scores = self.get_scores_by_cat(category)
8585
inlier_indices = anomaly.index[anomaly[OutputColumns.ANOMALY_COL] == 0]
8686
inliers = data.iloc[inlier_indices]
87-
if scores is not None and not scores.empty:
87+
if scores is not None and not scores.empty and self.date_column != "index":
8888
inliers = pd.merge(inliers, scores, on=self.date_column, how="inner")
89+
else:
90+
inliers = pd.merge(inliers, anomaly, left_index=True, right_index=True, how="inner")
8991
return inliers
9092

9193
def get_outliers_by_cat(self, category: str, data: pd.DataFrame):
9294
anomaly = self.get_anomalies_by_cat(category)
9395
scores = self.get_scores_by_cat(category)
9496
outliers_indices = anomaly.index[anomaly[OutputColumns.ANOMALY_COL] == 1]
9597
outliers = data.iloc[outliers_indices]
96-
if scores is not None and not scores.empty:
98+
if scores is not None and not scores.empty and self.date_column != "index":
9799
outliers = pd.merge(outliers, scores, on=self.date_column, how="inner")
100+
else:
101+
outliers = pd.merge(outliers, anomaly, left_index=True, right_index=True, how="inner")
98102
return outliers
99103

100104
def get_inliers(self, datasets):

ads/opctl/operator/lowcode/anomaly/model/base_model.py

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,33 @@
11
#!/usr/bin/env python
2-
# -*- coding: utf-8 -*--
32

43
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
54
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
65

7-
import fsspec
8-
import numpy as np
96
import os
10-
import pandas as pd
117
import tempfile
128
import time
139
from abc import ABC, abstractmethod
14-
from sklearn import linear_model
1510
from typing import Tuple
1611

12+
import fsspec
13+
import numpy as np
14+
import pandas as pd
15+
from sklearn import linear_model
16+
1717
from ads.common.object_storage_details import ObjectStorageDetails
1818
from ads.opctl import logger
1919
from ads.opctl.operator.lowcode.anomaly.const import OutputColumns, SupportedMetrics
2020
from ads.opctl.operator.lowcode.anomaly.utils import _build_metrics_df, default_signer
2121
from ads.opctl.operator.lowcode.common.utils import (
22-
human_time_friendly,
23-
enable_print,
2422
disable_print,
23+
enable_print,
24+
human_time_friendly,
2525
write_data,
2626
)
27-
from .anomaly_dataset import AnomalyDatasets, AnomalyOutput, TestData
28-
from ..const import SupportedModels
27+
28+
from ..const import NonTimeADSupportedModels, SupportedModels
2929
from ..operator_config import AnomalyOperatorConfig, AnomalyOperatorSpec
30+
from .anomaly_dataset import AnomalyDatasets, AnomalyOutput, TestData
3031

3132

3233
class AnomalyOperatorBaseModel(ABC):
@@ -53,15 +54,18 @@ def __init__(self, config: AnomalyOperatorConfig, datasets: AnomalyDatasets):
5354

5455
def generate_report(self):
5556
"""Generates the report."""
56-
import report_creator as rc
5757
import matplotlib.pyplot as plt
58+
import report_creator as rc
5859

5960
start_time = time.time()
6061
# fallback using sklearn oneclasssvm when the sub model _build_model fails
6162
try:
6263
anomaly_output = self._build_model()
6364
except Exception as e:
64-
anomaly_output = self._fallback_build_model()
65+
logger.warn(f"Found exception: {e}")
66+
if self.spec.datetime_column:
67+
anomaly_output = self._fallback_build_model()
68+
raise e
6569

6670
elapsed_time = time.time() - start_time
6771

@@ -79,7 +83,9 @@ def generate_report(self):
7983
for col, df in self.datasets.full_data_dict.items()
8084
]
8185
data_table = rc.Select(blocks=table_blocks)
82-
date_column = self.spec.datetime_column.name
86+
date_column = (
87+
self.spec.datetime_column.name if self.spec.datetime_column else "index"
88+
)
8389

8490
blocks = []
8591
for target, df in self.datasets.full_data_dict.items():
@@ -95,7 +101,7 @@ def generate_report(self):
95101
ax.grid()
96102
ax.plot(time_col, y, color="black")
97103
for i, index in enumerate(anomaly_col):
98-
if anomaly_col[i] == 1:
104+
if index == 1:
99105
ax.scatter(time_col[i], y[i], color="red", marker="o")
100106
plt.xlabel(date_column)
101107
plt.ylabel(col)
@@ -114,7 +120,7 @@ def generate_report(self):
114120
rc.Text(f"You selected the **`{self.spec.model}`** model."),
115121
rc.Text(
116122
"Based on your dataset, you could have also selected "
117-
f"any of the models: `{'`, `'.join(SupportedModels.keys())}`."
123+
f"any of the models: `{'`, `'.join(SupportedModels.keys() if self.spec.datetime_column else NonTimeADSupportedModels.keys())}`."
118124
),
119125
rc.Metric(
120126
heading="Analysis was completed in ",
@@ -170,7 +176,9 @@ def _test_data_evaluate_metrics(self, anomaly_output, test_data, elapsed_time):
170176

171177
for cat in anomaly_output.list_categories():
172178
output = anomaly_output.category_map[cat][0]
173-
date_col = self.spec.datetime_column.name
179+
date_col = (
180+
self.spec.datetime_column.name if self.spec.datetime_column else "index"
181+
)
174182

175183
test_data_i = test_data.get_data_for_series(cat)
176184

@@ -247,7 +255,7 @@ def _save_report(
247255
if ObjectStorageDetails.is_oci_path(unique_output_dir):
248256
storage_options = default_signer()
249257
else:
250-
storage_options = dict()
258+
storage_options = {}
251259

252260
# report-creator html report
253261
with tempfile.TemporaryDirectory() as temp_dir:
@@ -301,12 +309,11 @@ def _fallback_build_model(self):
301309
Fallback method for the sub model _build_model method.
302310
"""
303311
logger.warn(
304-
"The build_model method has failed for the model: {}. "
305-
"A fallback model will be built.".format(self.spec.model)
312+
f"The build_model method has failed for the model: {self.spec.model}. "
313+
"A fallback model will be built."
306314
)
307315

308316
date_column = self.spec.datetime_column.name
309-
dataset = self.datasets
310317

311318
anomaly_output = AnomalyOutput(date_column=date_column)
312319

@@ -320,7 +327,9 @@ def _fallback_build_model(self):
320327
y_pred = np.vectorize(self.outlier_map.get)(
321328
est.predict(df[self.spec.target_column].fillna(0).values.reshape(-1, 1))
322329
)
323-
scores = est.score_samples(df[self.spec.target_column].fillna(0).values.reshape(-1, 1))
330+
scores = est.score_samples(
331+
df[self.spec.target_column].fillna(0).values.reshape(-1, 1)
332+
)
324333

325334
anomaly = pd.DataFrame(
326335
{date_column: df[date_column], OutputColumns.ANOMALY_COL: y_pred}

ads/opctl/operator/lowcode/anomaly/model/factory.py

Lines changed: 41 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,41 @@
11
#!/usr/bin/env python
2-
# -*- coding: utf-8 -*--
32

4-
# Copyright (c) 2023 Oracle and/or its affiliates.
3+
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
54
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
65

7-
from ..const import SupportedModels
6+
from ads.opctl.operator.lowcode.anomaly.utils import select_auto_model
7+
8+
from ..const import NonTimeADSupportedModels, SupportedModels
89
from ..operator_config import AnomalyOperatorConfig
10+
from .anomaly_dataset import AnomalyDatasets
911
from .automlx import AutoMLXOperatorModel
1012
from .autots import AutoTSOperatorModel
11-
from ads.opctl.operator.lowcode.anomaly.utils import select_auto_model
1213

1314
# from .tods import TODSOperatorModel
1415
from .base_model import AnomalyOperatorBaseModel
15-
from .anomaly_dataset import AnomalyDatasets
16+
from .isolationforest import IsolationForestOperatorModel
17+
from .oneclasssvm import OneClassSVMOperatorModel
1618

1719

1820
class UnSupportedModelError(Exception):
19-
def __init__(self, model_type: str):
20-
super().__init__(
21-
f"Model: `{model_type}` "
22-
f"is not supported. Supported models: {SupportedModels.values}"
21+
"""Exception raised when the model is not supported.
22+
23+
Attributes:
24+
operator_config (AnomalyOperatorConfig): The operator configuration.
25+
model_type (str): The type of the unsupported model.
26+
"""
27+
28+
def __init__(self, operator_config: AnomalyOperatorConfig, model_type: str):
29+
supported_models = (
30+
SupportedModels.values
31+
if operator_config.spec.datetime_column
32+
else NonTimeADSupportedModels.values
2333
)
34+
message = (
35+
f"Model: `{model_type}` is not supported. "
36+
f"Supported models: {supported_models}"
37+
)
38+
super().__init__(message)
2439

2540

2641
class AnomalyOperatorModelFactory:
@@ -34,6 +49,13 @@ class AnomalyOperatorModelFactory:
3449
SupportedModels.AutoTS: AutoTSOperatorModel,
3550
}
3651

52+
_NonTime_MAP = {
53+
NonTimeADSupportedModels.OneClassSVM: OneClassSVMOperatorModel,
54+
NonTimeADSupportedModels.IsolationForest: IsolationForestOperatorModel,
55+
# TODO: Add DBScan model for non time based anomaly
56+
# NonTimeADSupportedModels.DBScan: DBScanOperatorModel,
57+
}
58+
3759
@classmethod
3860
def get_model(
3961
cls, operator_config: AnomalyOperatorConfig, datasets: AnomalyDatasets
@@ -61,7 +83,13 @@ def get_model(
6183
"""
6284
model_type = operator_config.spec.model
6385
if model_type == "auto":
64-
model_type = select_auto_model(datasets, operator_config)
65-
if model_type not in cls._MAP:
66-
raise UnSupportedModelError(model_type)
67-
return cls._MAP[model_type](config=operator_config, datasets=datasets)
86+
model_type = select_auto_model(operator_config)
87+
88+
model_map = (
89+
cls._MAP if operator_config.spec.datetime_column else cls._NonTime_MAP
90+
)
91+
92+
if model_type not in model_map:
93+
raise UnSupportedModelError(operator_config, model_type)
94+
95+
return model_map[model_type](config=operator_config, datasets=datasets)

0 commit comments

Comments
 (0)