Skip to content

Commit c34981b

Browse files
authored
ODSC-51511 : Fallback method to train model for the AD models (#553)
2 parents 3b9cc48 + 59148c7 commit c34981b

File tree

1 file changed

+43
-1
lines changed

1 file changed

+43
-1
lines changed

ads/opctl/operator/lowcode/anomaly/model/base_model.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import fsspec
1414
import pandas as pd
1515
import numpy as np
16+
from sklearn import linear_model
1617

1718
from ads.opctl import logger
1819

@@ -61,7 +62,12 @@ def generate_report(self):
6162
import matplotlib.pyplot as plt
6263

6364
start_time = time.time()
64-
anomaly_output = self._build_model()
65+
# fallback using sklearn oneclasssvm when the sub model _build_model fails
66+
try:
67+
anomaly_output = self._build_model()
68+
except Exception as e:
69+
anomaly_output = self._fallback_build_model()
70+
6571
elapsed_time = time.time() - start_time
6672

6773
summary_metrics = None
@@ -298,6 +304,42 @@ def _save_report(
298304
f"generated and placed to the: {unique_output_dir}."
299305
)
300306

307+
def _fallback_build_model(self):
308+
"""
309+
Fallback method for the sub model _build_model method.
310+
"""
311+
logger.warn(
312+
"The build_model method has failed for the model: {}. "
313+
"A fallback model will be built.".format(self.spec.model)
314+
)
315+
316+
date_column = self.spec.datetime_column.name
317+
dataset = self.datasets
318+
319+
anomaly_output = AnomalyOutput(date_column=date_column)
320+
321+
# map the output as per anomaly dataset class, 1: outlier, 0: inlier
322+
self.outlier_map = {1: 0, -1: 1}
323+
324+
# Iterate over the full_data_dict items
325+
for target, df in self.datasets.full_data_dict.items():
326+
est = linear_model.SGDOneClassSVM(random_state=42)
327+
est.fit(df[target].values.reshape(-1, 1))
328+
y_pred = np.vectorize(self.outlier_map.get)(est.predict(df[target].values.reshape(-1, 1)))
329+
scores = est.score_samples(df[target].values.reshape(-1, 1))
330+
331+
anomaly = pd.DataFrame({
332+
date_column: df[date_column],
333+
OutputColumns.ANOMALY_COL: y_pred
334+
}).reset_index(drop=True)
335+
score = pd.DataFrame({
336+
date_column: df[date_column],
337+
OutputColumns.SCORE_COL: scores
338+
}).reset_index(drop=True)
339+
anomaly_output.add_output(target, anomaly, score)
340+
341+
return anomaly_output
342+
301343
@abstractmethod
302344
def _generate_report(self):
303345
"""

0 commit comments

Comments
 (0)