autots bug

ahosler · ahosler · commit 129c0d862088 · 2024-02-05T18:06:31.000Z
diff --git a/ads/opctl/operator/lowcode/anomaly/MLoperator b/ads/opctl/operator/lowcode/anomaly/MLoperator
@@ -7,6 +7,7 @@ keywords:
   - Anomaly Detection
 backends:
   - job
+  - operator.local
 description: |
   Anomaly Detection is the identification of rare items, events, or observations in data that
   differ significantly from the expectation. This can be used for several scenarios like asset
diff --git a/ads/opctl/operator/lowcode/forecast/model/autots.py b/ads/opctl/operator/lowcode/forecast/model/autots.py
@@ -11,11 +11,7 @@
 import yaml
 
 from ads.opctl import logger
-from ads.opctl.operator.lowcode.common.utils import (
-    disable_print,
-    enable_print,
-    seconds_to_datetime,
-)
+from ads.opctl.operator.lowcode.common.utils import seconds_to_datetime
 from .base_model import ForecastOperatorBaseModel
 from ..operator_config import ForecastOperatorConfig
 from ads.common.decorator.runtime_dependency import runtime_dependency
@@ -124,21 +120,29 @@ def _build_model(self) -> pd.DataFrame:
         full_data_indexed = self.datasets.get_data_multi_indexed()
 
         dates = full_data_indexed.index.get_level_values(0).unique().tolist()
-        horizon_idx = dates[-self.spec.horizon :]
         train_idx = dates[: -self.spec.horizon]
 
-        regr_fcst = full_data_indexed[
-            full_data_indexed.index.get_level_values(0).isin(horizon_idx)
-        ].reset_index()
-        regr_train = full_data_indexed[
+        df_train = full_data_indexed[
             full_data_indexed.index.get_level_values(0).isin(train_idx)
-        ]
-        self.future_regressor_train = regr_train.copy()
+        ][[self.original_target_column]].reset_index()
+
+        # Future regressors need to be in wide format - (num_unique_dates x (num_unique_series x num_unique_cols))
+        additional_regressors = list(
+            set(full_data_indexed.columns) - {self.original_target_column}
+        )
+        future_regressor = full_data_indexed.reset_index().pivot(
+            index=self.spec.datetime_column.name,
+            columns=ForecastOutputColumns.SERIES,
+            values=additional_regressors,
+        )
+
+        future_reg = future_regressor[: -self.spec.horizon]
+        regr_fcst = future_regressor[-self.spec.horizon :]
 
         if self.loaded_models is None:
             model = model.fit(
-                full_data_indexed.reset_index(),
-                future_regressor=regr_train.reset_index(),
+                df_train,
+                future_regressor=future_reg,
                 date_col=self.spec.datetime_column.name,
                 value_col=self.original_target_column,
                 id_col=ForecastOutputColumns.SERIES,
diff --git a/tests/operators/anomaly/test_anomaly_simple.py b/tests/operators/anomaly/test_anomaly_simple.py
@@ -12,6 +12,7 @@
 import tempfile
 import os
 import numpy as np
+from datetime import datetime
 from ads.opctl.operator.cmd import run
 
 
@@ -79,7 +80,7 @@ def test_artificial_big(model):
     with tempfile.TemporaryDirectory() as tmpdirname:
         anomaly_yaml_filename = f"{tmpdirname}/anomaly.yaml"
         input_data = f"{tmpdirname}/data.csv"
-        output_dirname = f"{tmpdirname}/results"
+        output_dirname = f"{tmpdirname}/results_{datetime.now()}"
 
         d.to_csv(input_data, index=False)
 
@@ -121,7 +122,7 @@ def test_artificial_small(model):
     with tempfile.TemporaryDirectory() as tmpdirname:
         anomaly_yaml_filename = f"{tmpdirname}/anomaly.yaml"
         input_data = f"{tmpdirname}/data.csv"
-        output_dirname = f"{tmpdirname}/results"
+        output_dirname = f"{tmpdirname}/results_{datetime.now()}"
 
         d.to_csv(input_data, index=False)
 
@@ -170,7 +171,7 @@ def test_validation(model):
         input_data = f"{tmpdirname}/data.csv"
         valid_data = f"{tmpdirname}/valid_data.csv"
         test_data = f"{tmpdirname}/test_data.csv"
-        output_dirname = f"{tmpdirname}/results"
+        output_dirname = f"{tmpdirname}/results_{datetime.now()}"
 
         d.to_csv(input_data, index=False)
         v.to_csv(valid_data, index=False)
@@ -201,7 +202,7 @@ def test_validation(model):
 def test_load_datasets(model, data_dict):
     with tempfile.TemporaryDirectory() as tmpdirname:
         anomaly_yaml_filename = f"{tmpdirname}/anomaly.yaml"
-        output_dirname = f"{tmpdirname}/results"
+        output_dirname = f"{tmpdirname}/results_{datetime.now()}"
 
         yaml_i = deepcopy(TEMPLATE_YAML)
         yaml_i["spec"]["model"] = model
@@ -225,9 +226,3 @@ def test_load_datasets(model, data_dict):
         # oultiers = pd.read_csv(f"{output_dirname}/anomaly.csv")
         # print(oultiers)
         assert os.path.exists(f"{output_dirname}/report.html"), "Report not generated."
-
-    # if TEMPLATE_YAML["spec"]["generate_explanations"]:
-    # glb_expl = pd.read_csv(f"{tmpdirname}/results/global_explanation.csv")
-    # print(glb_expl)
-    # loc_expl = pd.read_csv(f"{tmpdirname}/results/local_explanation.csv")
-    # print(loc_expl)