Skip to content

Commit cb7634b

Browse files
committed
model segregation, score to be normalized, tests enabled, overlapping meethods removed
1 parent 2e53e12 commit cb7634b

File tree

6 files changed

+192
-146
lines changed

6 files changed

+192
-146
lines changed

ads/opctl/operator/lowcode/anomaly/const.py

Lines changed: 41 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,16 @@
55
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
66

77
import random
8+
89
from ads.common.extended_enum import ExtendedEnumMeta
910
from ads.opctl.operator.lowcode.common.const import DataColumns
10-
from merlion.models.anomaly import autoencoder, deep_point_anomaly_detector, isolation_forest, spectral_residual, windstats, windstats_monthly
11-
from merlion.models.anomaly.change_point import bocpd
12-
from merlion.models.forecast import prophet
1311

1412

1513
class SupportedModels(str, metaclass=ExtendedEnumMeta):
1614
"""Supported anomaly models."""
1715

1816
AutoTS = "autots"
1917
Auto = "auto"
20-
MerilonAD = "merlion_ad"
2118
IQR = "iqr"
2219
LOF = "lof"
2320
ZSCORE = "zscore"
@@ -26,6 +23,23 @@ class SupportedModels(str, metaclass=ExtendedEnumMeta):
2623
EE = "ee"
2724
ISOLATIONFOREST = "isolationforest"
2825

26+
# point anomaly
27+
DAGMM = "dagmm"
28+
DEEP_POINT_ANOMALY_DETECTOR = "deep_point_anomaly_detector"
29+
LSTM_ED = "lstm_ed"
30+
SPECTRAL_RESIDUAL = "spectral_residual"
31+
VAE = "vae"
32+
33+
# forecast_based
34+
ARIMA = "arima"
35+
ETS = "ets"
36+
PROPHET = "prophet"
37+
SARIMA = "sarima"
38+
39+
# changepoint
40+
BOCPD = "bocpd"
41+
42+
2943
class NonTimeADSupportedModels(str, metaclass=ExtendedEnumMeta):
3044
"""Supported non time-based anomaly detection models."""
3145

@@ -34,7 +48,7 @@ class NonTimeADSupportedModels(str, metaclass=ExtendedEnumMeta):
3448
RandomCutForest = "randomcutforest"
3549
# TODO : Add DBScan
3650
# DBScan = "dbscan"
37-
51+
3852

3953
class TODSSubModels(str, metaclass=ExtendedEnumMeta):
4054
"""Supported TODS sub models."""
@@ -66,81 +80,51 @@ class TODSSubModels(str, metaclass=ExtendedEnumMeta):
6680
}
6781

6882

69-
class MerlionADSubmodels(str, metaclass=ExtendedEnumMeta):
83+
class MerlionADModels(str, metaclass=ExtendedEnumMeta):
7084
"""Supported Merlion AD sub models."""
7185

7286
# point anomaly
73-
AUTOENCODER = "autoencoder"
7487
DAGMM = "dagmm"
75-
DBL = "dbl"
7688
DEEP_POINT_ANOMALY_DETECTOR = "deep_point_anomaly_detector"
77-
ISOLATION_FOREST = "isolation_forest"
78-
LOF = "lof"
7989
LSTM_ED = "lstm_ed"
80-
# RANDOM_CUT_FOREST = "random_cut_forest"
8190
SPECTRAL_RESIDUAL = "spectral_residual"
82-
STAT_RESIDUAL = "stat_residual"
8391
VAE = "vae"
84-
WINDSTATS = "windstats"
85-
WINDSTATS_MONTHLY = "windstats_monthly"
86-
ZMS = "zms"
8792

8893
# forecast_based
8994
ARIMA = "arima"
9095
ETS = "ets"
91-
MSES = "mses"
9296
PROPHET = "prophet"
9397
SARIMA = "sarima"
9498

95-
#changepoint
99+
# changepoint
96100
BOCPD = "bocpd"
97101

98102

99103
MERLIONAD_IMPORT_MODEL_MAP = {
100-
MerlionADSubmodels.AUTOENCODER: ".autoendcoder",
101-
MerlionADSubmodels.DAGMM: ".dagmm",
102-
MerlionADSubmodels.DBL: ".dbl",
103-
MerlionADSubmodels.DEEP_POINT_ANOMALY_DETECTOR: ".deep_point_anomaly_detector",
104-
MerlionADSubmodels.ISOLATION_FOREST: ".isolation_forest",
105-
MerlionADSubmodels.LOF: ".lof",
106-
MerlionADSubmodels.LSTM_ED: ".lstm_ed",
107-
# MerlionADSubmodels.RANDOM_CUT_FOREST: ".random_cut_forest",
108-
MerlionADSubmodels.SPECTRAL_RESIDUAL: ".spectral_residual",
109-
MerlionADSubmodels.STAT_RESIDUAL: ".stat_residual",
110-
MerlionADSubmodels.VAE: ".vae",
111-
MerlionADSubmodels.WINDSTATS: ".windstats",
112-
MerlionADSubmodels.WINDSTATS_MONTHLY: ".windstats_monthly",
113-
MerlionADSubmodels.ZMS: ".zms",
114-
MerlionADSubmodels.ARIMA: ".forecast_based.arima",
115-
MerlionADSubmodels.ETS: ".forecast_based.ets",
116-
MerlionADSubmodels.MSES: ".forecast_based.mses",
117-
MerlionADSubmodels.PROPHET: ".forecast_based.prophet",
118-
MerlionADSubmodels.SARIMA: ".forecast_based.sarima",
119-
MerlionADSubmodels.BOCPD: ".change_point.bocpd",
104+
MerlionADModels.DAGMM: ".dagmm",
105+
MerlionADModels.DEEP_POINT_ANOMALY_DETECTOR: ".deep_point_anomaly_detector",
106+
MerlionADModels.LSTM_ED: ".lstm_ed",
107+
MerlionADModels.SPECTRAL_RESIDUAL: ".spectral_residual",
108+
MerlionADModels.VAE: ".vae",
109+
MerlionADModels.ARIMA: ".forecast_based.arima",
110+
MerlionADModels.ETS: ".forecast_based.ets",
111+
MerlionADModels.PROPHET: ".forecast_based.prophet",
112+
MerlionADModels.SARIMA: ".forecast_based.sarima",
113+
MerlionADModels.BOCPD: ".change_point.bocpd",
120114
}
121115

122116

123117
MERLIONAD_MODEL_MAP = {
124-
MerlionADSubmodels.AUTOENCODER: "AutoEncoder",
125-
MerlionADSubmodels.DAGMM: "DAGMM",
126-
MerlionADSubmodels.DBL: "DynamicBaseline",
127-
MerlionADSubmodels.DEEP_POINT_ANOMALY_DETECTOR: "DeepPointAnomalyDetector",
128-
MerlionADSubmodels.ISOLATION_FOREST: "IsolationForest",
129-
MerlionADSubmodels.LOF: "LOF",
130-
MerlionADSubmodels.LSTM_ED: "LSTMED",
131-
# MerlionADSubmodels.RANDOM_CUT_FOREST: "RandomCutForest",
132-
MerlionADSubmodels.SPECTRAL_RESIDUAL: "SpectralResidual",
133-
MerlionADSubmodels.STAT_RESIDUAL: "StatThreshold",
134-
MerlionADSubmodels.VAE: "VAE",
135-
MerlionADSubmodels.WINDSTATS: "WindStats",
136-
MerlionADSubmodels.WINDSTATS_MONTHLY: "MonthlyWindStats",
137-
MerlionADSubmodels.ZMS: "ZMS",
138-
MerlionADSubmodels.ARIMA: "ArimaDetector",
139-
MerlionADSubmodels.ETS: "ETSDetector",
140-
MerlionADSubmodels.MSES: "MSESDetector",
141-
MerlionADSubmodels.PROPHET: "ProphetDetector",
142-
MerlionADSubmodels.SARIMA: "SarimaDetector",
143-
MerlionADSubmodels.BOCPD: "BOCPD",
118+
MerlionADModels.DAGMM: "DAGMM",
119+
MerlionADModels.DEEP_POINT_ANOMALY_DETECTOR: "DeepPointAnomalyDetector",
120+
MerlionADModels.LSTM_ED: "LSTMED",
121+
MerlionADModels.SPECTRAL_RESIDUAL: "SpectralResidual",
122+
MerlionADModels.VAE: "VAE",
123+
MerlionADModels.ARIMA: "ArimaDetector",
124+
MerlionADModels.ETS: "ETSDetector",
125+
MerlionADModels.PROPHET: "ProphetDetector",
126+
MerlionADModels.SARIMA: "SarimaDetector",
127+
MerlionADModels.BOCPD: "BOCPD",
144128
}
145129

146130

@@ -182,6 +166,5 @@ class OutputColumns(str, metaclass=ExtendedEnumMeta):
182166
Series = DataColumns.Series
183167

184168

185-
MERLION_DEFAULT_MODEL = "prophet"
186169
TODS_DEFAULT_MODEL = "ocsvm"
187170
SUBSAMPLE_THRESHOLD = 1000

ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py

Lines changed: 62 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111

1212
from ads.common.decorator.runtime_dependency import runtime_dependency
1313
from ads.opctl.operator.lowcode.anomaly.const import (
14-
MERLION_DEFAULT_MODEL,
1514
MERLIONAD_IMPORT_MODEL_MAP,
1615
MERLIONAD_MODEL_MAP,
1716
OutputColumns,
@@ -31,31 +30,30 @@ class AnomalyMerlionOperatorModel(AnomalyOperatorBaseModel):
3130
"install the required packages."
3231
),
3332
)
34-
def _get_config_model(self, model_list):
33+
def _get_config_model(self, model_name):
3534
"""
3635
Returns a dictionary with model names as keys and a list of model config and model object as values.
3736
3837
Parameters
3938
----------
40-
model_list : list
41-
A list of model names.
39+
model_name : str
40+
model name from the Merlion model list.
4241
4342
Returns
4443
-------
4544
dict
4645
A dictionary with model names as keys and a list of model config and model object as values.
4746
"""
4847
model_config_map = {}
49-
for model_name in model_list:
50-
model_module = importlib.import_module(
51-
name=MERLIONAD_IMPORT_MODEL_MAP.get(model_name),
52-
package="merlion.models.anomaly",
53-
)
54-
model_config = getattr(
55-
model_module, MERLIONAD_MODEL_MAP.get(model_name) + "Config"
56-
)
57-
model = getattr(model_module, MERLIONAD_MODEL_MAP.get(model_name))
58-
model_config_map[model_name] = [model_config, model]
48+
model_module = importlib.import_module(
49+
name=MERLIONAD_IMPORT_MODEL_MAP.get(model_name),
50+
package="merlion.models.anomaly",
51+
)
52+
model_config = getattr(
53+
model_module, MERLIONAD_MODEL_MAP.get(model_name) + "Config"
54+
)
55+
model = getattr(model_module, MERLIONAD_MODEL_MAP.get(model_name))
56+
model_config_map[model_name] = [model_config, model]
5957
return model_config_map
6058

6159
def _build_model(self) -> AnomalyOutput:
@@ -75,66 +73,57 @@ def _build_model(self) -> AnomalyOutput:
7573
anomaly_output = AnomalyOutput(date_column="index")
7674
anomaly_threshold = model_kwargs.get("anomaly_threshold", 95)
7775
model_config_map = {}
78-
if model_kwargs.get("sub_model", None):
79-
model_config_map = self._get_config_model(model_kwargs.get("sub_model"))
80-
else:
81-
from merlion.models.anomaly.forecast_based.prophet import ( # noqa: I001
82-
ProphetDetector,
83-
ProphetDetectorConfig,
84-
)
85-
86-
model_config_map[MERLION_DEFAULT_MODEL] = [
87-
ProphetDetectorConfig,
88-
ProphetDetector,
89-
]
90-
91-
date_column = self.spec.datetime_column.name
92-
93-
anomaly_output = AnomalyOutput(date_column=date_column)
94-
# model_objects = defaultdict(list)
95-
for target, df in self.datasets.full_data_dict.items():
96-
data = df.set_index(date_column)
97-
data = TimeSeries.from_pd(data)
98-
for model_name, (model_config, model) in model_config_map.items():
99-
model_config = model_config(**self.spec.model_kwargs)
100-
if hasattr(model_config, "target_seq_index"):
101-
model_config.target_seq_index = df.columns.get_loc(
102-
self.spec.target_column
76+
model_config_map = self._get_config_model(self.spec.model)
77+
78+
date_column = self.spec.datetime_column.name
79+
80+
anomaly_output = AnomalyOutput(date_column=date_column)
81+
# model_objects = defaultdict(list)
82+
for target, df in self.datasets.full_data_dict.items():
83+
data = df.set_index(date_column)
84+
data = TimeSeries.from_pd(data)
85+
for model_name, (model_config, model) in model_config_map.items():
86+
model_config = model_config(**self.spec.model_kwargs)
87+
if hasattr(model_config, "target_seq_index"):
88+
model_config.target_seq_index = df.columns.get_loc(
89+
self.spec.target_column
90+
)
91+
model = model(model_config)
92+
93+
scores = model.train(train_data=data, anomaly_labels=None)
94+
scores = scores.to_pd().reset_index()
95+
scores["anom_score"] = (
96+
scores["anom_score"] - scores["anom_score"].min()
97+
) / (scores["anom_score"].max() - scores["anom_score"].min())
98+
99+
try:
100+
y_pred = model.get_anomaly_label(data)
101+
y_pred = (y_pred.to_pd().reset_index()["anom_score"] > 0).astype(
102+
int
103+
)
104+
except Exception as e:
105+
y_pred = (
106+
scores["anom_score"]
107+
> np.percentile(
108+
scores["anom_score"],
109+
anomaly_threshold,
103110
)
104-
model = model(model_config)
105-
106-
scores = model.train(train_data=data, anomaly_labels=None)
107-
108-
try:
109-
y_pred = model.get_anomaly_label(data)
110-
y_pred = (
111-
y_pred.to_pd().reset_index()["anom_score"] > 0
112-
).astype(int)
113-
except Exception as e:
114-
y_pred = (
115-
scores.to_pd().reset_index()["anom_score"]
116-
> np.percentile(
117-
scores.to_pd().reset_index()["anom_score"],
118-
anomaly_threshold,
119-
)
120-
).astype(int)
121-
122-
index_col = df.columns[0]
123-
124-
anomaly = pd.DataFrame(
125-
{index_col: df[index_col], OutputColumns.ANOMALY_COL: y_pred}
126-
).reset_index(drop=True)
127-
score = pd.DataFrame(
128-
{
129-
index_col: df[index_col],
130-
OutputColumns.SCORE_COL: scores.to_pd().reset_index()[
131-
"anom_score"
132-
],
133-
}
134-
).reset_index(drop=True)
135-
# model_objects[model_name].append(model)
136-
137-
anomaly_output.add_output(target, anomaly, score)
111+
).astype(int)
112+
113+
index_col = df.columns[0]
114+
115+
anomaly = pd.DataFrame(
116+
{index_col: df[index_col], OutputColumns.ANOMALY_COL: y_pred}
117+
).reset_index(drop=True)
118+
score = pd.DataFrame(
119+
{
120+
index_col: df[index_col],
121+
OutputColumns.SCORE_COL: scores["anom_score"],
122+
}
123+
).reset_index(drop=True)
124+
# model_objects[model_name].append(model)
125+
126+
anomaly_output.add_output(target, anomaly, score)
138127
return anomaly_output
139128

140129
def _generate_report(self):

ads/opctl/operator/lowcode/anomaly/model/factory.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,16 @@ class AnomalyOperatorModelFactory:
5151
SupportedModels.ROLLING_ZSCORE: AutoTSOperatorModel,
5252
SupportedModels.EE: AutoTSOperatorModel,
5353
SupportedModels.MAD: AutoTSOperatorModel,
54-
SupportedModels.MerilonAD: AnomalyMerlionOperatorModel,
54+
SupportedModels.DAGMM: AnomalyMerlionOperatorModel,
55+
SupportedModels.DEEP_POINT_ANOMALY_DETECTOR: AnomalyMerlionOperatorModel,
56+
SupportedModels.LSTM_ED: AnomalyMerlionOperatorModel,
57+
SupportedModels.SPECTRAL_RESIDUAL: AnomalyMerlionOperatorModel,
58+
SupportedModels.VAE: AnomalyMerlionOperatorModel,
59+
SupportedModels.ARIMA: AnomalyMerlionOperatorModel,
60+
SupportedModels.ETS: AnomalyMerlionOperatorModel,
61+
SupportedModels.PROPHET: AnomalyMerlionOperatorModel,
62+
SupportedModels.SARIMA: AnomalyMerlionOperatorModel,
63+
SupportedModels.BOCPD: AnomalyMerlionOperatorModel,
5564
}
5665

5766
_NonTime_MAP = {

ads/opctl/operator/lowcode/anomaly/schema.yaml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,16 @@ spec:
370370
- rolling_zscore
371371
- mad
372372
- ee
373-
- merlion_ad
373+
- dagmm
374+
- deep_point_anomaly_detector
375+
- lstm_ed
376+
- spectral_residual
377+
- vae
378+
- arima
379+
- ets
380+
- sarima
381+
- bocpd
382+
- prophet
374383
meta:
375384
description: "The model to be used for anomaly detection"
376385

ads/opctl/operator/lowcode/anomaly/utils.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import os
77

8+
import numpy as np
89
import pandas as pd
910

1011
from ads.opctl import logger
@@ -27,6 +28,8 @@ def _build_metrics_df(y_true, y_pred, column_name):
2728
)
2829

2930
metrics = {}
31+
np.nan_to_num(y_true, copy=False)
32+
np.nan_to_num(y_pred, copy=False)
3033
metrics[SupportedMetrics.RECALL] = recall_score(y_true, y_pred)
3134
metrics[SupportedMetrics.PRECISION] = precision_score(y_true, y_pred)
3235
metrics[SupportedMetrics.ACCURACY] = accuracy_score(y_true, y_pred)

0 commit comments

Comments
 (0)