Skip to content

Commit a0efd38

Browse files
authored
Add sf merlion for time-based and forecast AD (#956)
2 parents c034c5f + 0151ab3 commit a0efd38

File tree

12 files changed

+361
-35
lines changed

12 files changed

+361
-35
lines changed

.github/workflows/run-forecast-unit-tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ jobs:
2727
strategy:
2828
fail-fast: false
2929
matrix:
30-
python-version: ["3.8", "3.9", "3.10"]
30+
python-version: ["3.9", "3.10", "3.11"]
3131

3232
steps:
3333
- uses: actions/checkout@v4

.github/workflows/run-operators-unit-tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ jobs:
2727
strategy:
2828
fail-fast: false
2929
matrix:
30-
python-version: ["3.8", "3.9", "3.10", "3.11"]
30+
python-version: ["3.9", "3.10", "3.11"]
3131

3232
steps:
3333
- uses: actions/checkout@v4

THIRD_PARTY_LICENSES.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,11 @@ rrcf
483483
* Source code: https://github.com/kLabUM/rrcf
484484
* Project home: https://github.com/kLabUM/rrcf
485485

486+
Merlion
487+
* Copyright 2021 Salesforce.com Inc
488+
* License: BSD-3 Clause License
489+
* Source code: https://github.com/salesforce/Merlion
490+
* Project Home: https://github.com/salesforce/Merlion
486491

487492
=============================== Licenses ===============================
488493
------------------------------------------------------------------------

ads/opctl/operator/lowcode/anomaly/const.py

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,23 @@ class SupportedModels(str, metaclass=ExtendedEnumMeta):
2121
EE = "ee"
2222
ISOLATIONFOREST = "isolationforest"
2323

24+
# point anomaly
25+
DAGMM = "dagmm"
26+
DEEP_POINT_ANOMALY_DETECTOR = "deep_point_anomaly_detector"
27+
LSTM_ED = "lstm_ed"
28+
SPECTRAL_RESIDUAL = "spectral_residual"
29+
VAE = "vae"
30+
31+
# forecast_based
32+
ARIMA = "arima"
33+
ETS = "ets"
34+
PROPHET = "prophet"
35+
SARIMA = "sarima"
36+
37+
# changepoint
38+
BOCPD = "bocpd"
39+
40+
2441
class NonTimeADSupportedModels(str, metaclass=ExtendedEnumMeta):
2542
"""Supported non time-based anomaly detection models."""
2643

@@ -29,7 +46,7 @@ class NonTimeADSupportedModels(str, metaclass=ExtendedEnumMeta):
2946
RandomCutForest = "randomcutforest"
3047
# TODO : Add DBScan
3148
# DBScan = "dbscan"
32-
49+
3350

3451
class TODSSubModels(str, metaclass=ExtendedEnumMeta):
3552
"""Supported TODS sub models."""
@@ -61,6 +78,54 @@ class TODSSubModels(str, metaclass=ExtendedEnumMeta):
6178
}
6279

6380

81+
class MerlionADModels(str, metaclass=ExtendedEnumMeta):
82+
"""Supported Merlion AD sub models."""
83+
84+
# point anomaly
85+
DAGMM = "dagmm"
86+
DEEP_POINT_ANOMALY_DETECTOR = "deep_point_anomaly_detector"
87+
LSTM_ED = "lstm_ed"
88+
SPECTRAL_RESIDUAL = "spectral_residual"
89+
VAE = "vae"
90+
91+
# forecast_based
92+
ARIMA = "arima"
93+
ETS = "ets"
94+
PROPHET = "prophet"
95+
SARIMA = "sarima"
96+
97+
# changepoint
98+
BOCPD = "bocpd"
99+
100+
101+
MERLIONAD_IMPORT_MODEL_MAP = {
102+
MerlionADModels.DAGMM: ".dagmm",
103+
MerlionADModels.DEEP_POINT_ANOMALY_DETECTOR: ".deep_point_anomaly_detector",
104+
MerlionADModels.LSTM_ED: ".lstm_ed",
105+
MerlionADModels.SPECTRAL_RESIDUAL: ".spectral_residual",
106+
MerlionADModels.VAE: ".vae",
107+
MerlionADModels.ARIMA: ".forecast_based.arima",
108+
MerlionADModels.ETS: ".forecast_based.ets",
109+
MerlionADModels.PROPHET: ".forecast_based.prophet",
110+
MerlionADModels.SARIMA: ".forecast_based.sarima",
111+
MerlionADModels.BOCPD: ".change_point.bocpd",
112+
}
113+
114+
115+
MERLIONAD_MODEL_MAP = {
116+
MerlionADModels.DAGMM: "DAGMM",
117+
MerlionADModels.DEEP_POINT_ANOMALY_DETECTOR: "DeepPointAnomalyDetector",
118+
MerlionADModels.LSTM_ED: "LSTMED",
119+
MerlionADModels.SPECTRAL_RESIDUAL: "SpectralResidual",
120+
MerlionADModels.VAE: "VAE",
121+
MerlionADModels.ARIMA: "ArimaDetector",
122+
MerlionADModels.ETS: "ETSDetector",
123+
MerlionADModels.PROPHET: "ProphetDetector",
124+
MerlionADModels.SARIMA: "SarimaDetector",
125+
MerlionADModels.BOCPD: "BOCPD",
126+
}
127+
128+
64129
class SupportedMetrics(str, metaclass=ExtendedEnumMeta):
65130
UNSUPERVISED_UNIFY95 = "unsupervised_unify95"
66131
UNSUPERVISED_UNIFY95_LOG_LOSS = "unsupervised_unify95_log_loss"
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
4+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
5+
6+
import importlib
7+
8+
import numpy as np
9+
import pandas as pd
10+
from merlion.post_process.threshold import AggregateAlarms
11+
from merlion.utils import TimeSeries
12+
13+
from ads.common.decorator.runtime_dependency import runtime_dependency
14+
from ads.opctl.operator.lowcode.anomaly.const import (
15+
MERLIONAD_IMPORT_MODEL_MAP,
16+
MERLIONAD_MODEL_MAP,
17+
OutputColumns,
18+
SupportedModels,
19+
)
20+
21+
from .anomaly_dataset import AnomalyOutput
22+
from .base_model import AnomalyOperatorBaseModel
23+
24+
25+
class AnomalyMerlionOperatorModel(AnomalyOperatorBaseModel):
26+
"""Class representing Merlion Anomaly Detection operator model."""
27+
28+
@runtime_dependency(
29+
module="merlion",
30+
err_msg=(
31+
"Please run `pip3 install salesforce-merlion[all]` to "
32+
"install the required packages."
33+
),
34+
)
35+
def _get_config_model(self, model_name):
36+
"""
37+
Returns a dictionary with model names as keys and a list of model config and model object as values.
38+
39+
Parameters
40+
----------
41+
model_name : str
42+
model name from the Merlion model list.
43+
44+
Returns
45+
-------
46+
dict
47+
A dictionary with model names as keys and a list of model config and model object as values.
48+
"""
49+
model_config_map = {}
50+
model_module = importlib.import_module(
51+
name=MERLIONAD_IMPORT_MODEL_MAP.get(model_name),
52+
package="merlion.models.anomaly",
53+
)
54+
model_config = getattr(
55+
model_module, MERLIONAD_MODEL_MAP.get(model_name) + "Config"
56+
)
57+
model = getattr(model_module, MERLIONAD_MODEL_MAP.get(model_name))
58+
model_config_map[model_name] = [model_config, model]
59+
return model_config_map
60+
61+
def _build_model(self) -> AnomalyOutput:
62+
"""
63+
Builds a Merlion anomaly detection model and trains it using the given data.
64+
65+
Parameters
66+
----------
67+
None
68+
69+
Returns
70+
-------
71+
AnomalyOutput
72+
An AnomalyOutput object containing the anomaly detection results.
73+
"""
74+
model_kwargs = self.spec.model_kwargs
75+
anomaly_output = AnomalyOutput(date_column="index")
76+
anomaly_threshold = model_kwargs.get("anomaly_threshold", 95)
77+
model_config_map = {}
78+
model_config_map = self._get_config_model(self.spec.model)
79+
80+
date_column = self.spec.datetime_column.name
81+
82+
anomaly_output = AnomalyOutput(date_column=date_column)
83+
# model_objects = defaultdict(list)
84+
for target, df in self.datasets.full_data_dict.items():
85+
data = df.set_index(date_column)
86+
data = TimeSeries.from_pd(data)
87+
for model_name, (model_config, model) in model_config_map.items():
88+
if self.spec.model == SupportedModels.BOCPD:
89+
model_config = model_config(**self.spec.model_kwargs)
90+
else:
91+
model_config = model_config(
92+
**{
93+
**self.spec.model_kwargs,
94+
"threshold": AggregateAlarms(
95+
alm_threshold=model_kwargs.get("alm_threshold")
96+
if model_kwargs.get("alm_threshold")
97+
else None
98+
),
99+
}
100+
)
101+
if hasattr(model_config, "target_seq_index"):
102+
model_config.target_seq_index = df.columns.get_loc(
103+
self.spec.target_column
104+
)
105+
model = model(model_config)
106+
107+
scores = model.train(train_data=data, anomaly_labels=None)
108+
scores = scores.to_pd().reset_index()
109+
scores["anom_score"] = (
110+
scores["anom_score"] - scores["anom_score"].min()
111+
) / (scores["anom_score"].max() - scores["anom_score"].min())
112+
113+
try:
114+
y_pred = model.get_anomaly_label(data)
115+
y_pred = (y_pred.to_pd().reset_index()["anom_score"] > 0).astype(
116+
int
117+
)
118+
except Exception as e:
119+
y_pred = (
120+
scores["anom_score"]
121+
> np.percentile(
122+
scores["anom_score"],
123+
anomaly_threshold,
124+
)
125+
).astype(int)
126+
127+
index_col = df.columns[0]
128+
129+
anomaly = pd.DataFrame(
130+
{index_col: df[index_col], OutputColumns.ANOMALY_COL: y_pred}
131+
).reset_index(drop=True)
132+
score = pd.DataFrame(
133+
{
134+
index_col: df[index_col],
135+
OutputColumns.SCORE_COL: scores["anom_score"],
136+
}
137+
).reset_index(drop=True)
138+
# model_objects[model_name].append(model)
139+
140+
anomaly_output.add_output(target, anomaly, score)
141+
return anomaly_output
142+
143+
def _generate_report(self):
144+
"""Genreates a report for the model."""
145+
import report_creator as rc
146+
147+
other_sections = [
148+
rc.Heading("Selected Models Overview", level=2),
149+
rc.Text(
150+
"The following tables provide information regarding the chosen model."
151+
),
152+
]
153+
154+
model_description = rc.Text(
155+
"The Merlion anomaly detection model is a full-stack automated machine learning system for anomaly detection."
156+
)
157+
158+
return (
159+
model_description,
160+
other_sections,
161+
)

ads/opctl/operator/lowcode/anomaly/model/autots.py

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,17 @@
55
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
66

77
from ads.common.decorator.runtime_dependency import runtime_dependency
8+
from ads.opctl import logger
89
from ads.opctl.operator.lowcode.anomaly.const import OutputColumns
10+
11+
from ..const import SupportedModels
912
from .anomaly_dataset import AnomalyOutput
1013
from .base_model import AnomalyOperatorBaseModel
11-
from ..const import SupportedModels
12-
from ads.opctl import logger
1314

1415

1516
class AutoTSOperatorModel(AnomalyOperatorBaseModel):
1617
"""Class representing AutoTS Anomaly Detection operator model."""
18+
1719
model_mapping = {
1820
"isolationforest": "IsolationForest",
1921
"lof": "LOF",
@@ -22,30 +24,43 @@ class AutoTSOperatorModel(AnomalyOperatorBaseModel):
2224
"rolling_zscore": "rolling_zscore",
2325
"mad": "mad",
2426
"minmax": "minmax",
25-
"iqr": "IQR"
27+
"iqr": "IQR",
2628
}
2729

2830
@runtime_dependency(
2931
module="autots",
3032
err_msg=(
31-
"Please run `pip3 install autots` to "
32-
"install the required dependencies for AutoTS."
33+
"Please run `pip3 install autots` to "
34+
"install the required dependencies for AutoTS."
3335
),
3436
)
3537
def _build_model(self) -> AnomalyOutput:
3638
from autots.evaluator.anomaly_detector import AnomalyDetector
3739

38-
method = SupportedModels.ISOLATIONFOREST if self.spec.model == SupportedModels.AutoTS else self.spec.model
39-
model_params = {"method": self.model_mapping[method],
40-
"transform_dict": self.spec.model_kwargs.get("transform_dict", {}),
41-
"output": self.spec.model_kwargs.get("output", "univariate"), "method_params": {}}
40+
method = (
41+
SupportedModels.ISOLATIONFOREST
42+
if self.spec.model == SupportedModels.AutoTS
43+
else self.spec.model
44+
)
45+
model_params = {
46+
"method": self.model_mapping[method],
47+
"transform_dict": self.spec.model_kwargs.get("transform_dict", {}),
48+
"output": self.spec.model_kwargs.get("output", "univariate"),
49+
"method_params": {},
50+
}
4251
# Supported methods with contamination param
43-
if method in [SupportedModels.ISOLATIONFOREST, SupportedModels.LOF, SupportedModels.EE]:
44-
model_params["method_params"][
45-
"contamination"] = self.spec.contamination if self.spec.contamination else 0.01
46-
else:
47-
if self.spec.contamination:
48-
raise ValueError(f"The contamination parameter is not supported for the selected model \"{method}\"")
52+
if method in [
53+
SupportedModels.ISOLATIONFOREST,
54+
SupportedModels.LOF,
55+
SupportedModels.EE,
56+
]:
57+
model_params["method_params"]["contamination"] = (
58+
self.spec.contamination if self.spec.contamination else 0.01
59+
)
60+
elif self.spec.contamination:
61+
raise ValueError(
62+
f'The contamination parameter is not supported for the selected model "{method}"'
63+
)
4964
logger.info(f"model params: {model_params}")
5065

5166
model = AnomalyDetector(**model_params)

ads/opctl/operator/lowcode/anomaly/model/factory.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,16 @@
44
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
55

66
from ads.opctl.operator.lowcode.anomaly.utils import select_auto_model
7+
8+
from ..const import NonTimeADSupportedModels, SupportedModels
9+
from ..operator_config import AnomalyOperatorConfig
710
from .anomaly_dataset import AnomalyDatasets
11+
from .anomaly_merlion import AnomalyMerlionOperatorModel
812
from .autots import AutoTSOperatorModel
913
from .base_model import AnomalyOperatorBaseModel
1014
from .isolationforest import IsolationForestOperatorModel
1115
from .oneclasssvm import OneClassSVMOperatorModel
1216
from .randomcutforest import RandomCutForestOperatorModel
13-
from ..const import NonTimeADSupportedModels, SupportedModels
14-
from ..operator_config import AnomalyOperatorConfig
1517

1618

1719
class UnSupportedModelError(Exception):
@@ -48,7 +50,17 @@ class AnomalyOperatorModelFactory:
4850
SupportedModels.ZSCORE: AutoTSOperatorModel,
4951
SupportedModels.ROLLING_ZSCORE: AutoTSOperatorModel,
5052
SupportedModels.EE: AutoTSOperatorModel,
51-
SupportedModels.MAD: AutoTSOperatorModel
53+
SupportedModels.MAD: AutoTSOperatorModel,
54+
SupportedModels.DAGMM: AnomalyMerlionOperatorModel,
55+
SupportedModels.DEEP_POINT_ANOMALY_DETECTOR: AnomalyMerlionOperatorModel,
56+
SupportedModels.LSTM_ED: AnomalyMerlionOperatorModel,
57+
SupportedModels.SPECTRAL_RESIDUAL: AnomalyMerlionOperatorModel,
58+
SupportedModels.VAE: AnomalyMerlionOperatorModel,
59+
SupportedModels.ARIMA: AnomalyMerlionOperatorModel,
60+
SupportedModels.ETS: AnomalyMerlionOperatorModel,
61+
SupportedModels.PROPHET: AnomalyMerlionOperatorModel,
62+
SupportedModels.SARIMA: AnomalyMerlionOperatorModel,
63+
SupportedModels.BOCPD: AnomalyMerlionOperatorModel,
5264
}
5365

5466
_NonTime_MAP = {

0 commit comments

Comments
 (0)