Merge branch 'main' into add_min_max_prophet

ahosler · web-flow · commit 724f423f9a09 · 2025-03-18T09:13:32.000Z
diff --git a/ads/opctl/operator/lowcode/forecast/model/automlx.py b/ads/opctl/operator/lowcode/forecast/model/automlx.py
@@ -469,14 +469,27 @@ def explain_model(self):
                         index="row", columns="Feature", values="Attribution"
                     )
                     explanations_df = explanations_df.reset_index(drop=True)
-
+                    explanations_df[ForecastOutputColumns.DATE] = (
+                        self.datasets.get_horizon_at_series(
+                            s_id=s_id
+                        )[self.spec.datetime_column.name].reset_index(drop=True)
+                    )
                     # Store the explanations in the local_explanation dictionary
                     self.local_explanation[s_id] = explanations_df
 
                     self.global_explanation[s_id] = dict(
                         zip(
-                            self.local_explanation[s_id].columns,
-                            np.nanmean(np.abs(self.local_explanation[s_id]), axis=0),
+                            self.local_explanation[s_id]
+                            .drop(ForecastOutputColumns.DATE, axis=1)
+                            .columns,
+                            np.nanmean(
+                                np.abs(
+                                    self.local_explanation[s_id].drop(
+                                        ForecastOutputColumns.DATE, axis=1
+                                    )
+                                ),
+                                axis=0,
+                            ),
                         )
                     )
                 else:
diff --git a/ads/opctl/operator/lowcode/forecast/model/base_model.py b/ads/opctl/operator/lowcode/forecast/model/base_model.py
@@ -46,6 +46,7 @@
     AUTO_SELECT,
     BACKTEST_REPORT_NAME,
     SUMMARY_METRICS_HORIZON_LIMIT,
+    ForecastOutputColumns,
     SpeedAccuracyMode,
     SupportedMetrics,
     SupportedModels,
@@ -743,43 +744,60 @@ def explain_model(self):
             include_horizon=False
         ).items():
             if s_id in self.models:
-                explain_predict_fn = self.get_explain_predict_fn(series_id=s_id)
-                data_trimmed = data_i.tail(
-                    max(int(len(data_i) * ratio), 5)
-                ).reset_index(drop=True)
-                data_trimmed[datetime_col_name] = data_trimmed[datetime_col_name].apply(
-                    lambda x: x.timestamp()
-                )
-
-                # Explainer fails when boolean columns are passed
-
-                _, data_trimmed_encoded = _label_encode_dataframe(
-                    data_trimmed,
-                    no_encode={datetime_col_name, self.original_target_column},
-                )
-
-                kernel_explnr = PermutationExplainer(
-                    model=explain_predict_fn, masker=data_trimmed_encoded
-                )
-                kernel_explnr_vals = kernel_explnr.shap_values(data_trimmed_encoded)
-                exp_end_time = time.time()
-                global_ex_time = global_ex_time + exp_end_time - exp_start_time
-                self.local_explainer(
-                    kernel_explnr, series_id=s_id, datetime_col_name=datetime_col_name
-                )
-                local_ex_time = local_ex_time + time.time() - exp_end_time
+                try:
+                    explain_predict_fn = self.get_explain_predict_fn(series_id=s_id)
+                    data_trimmed = data_i.tail(
+                        max(int(len(data_i) * ratio), 5)
+                    ).reset_index(drop=True)
+                    data_trimmed[datetime_col_name] = data_trimmed[
+                        datetime_col_name
+                    ].apply(lambda x: x.timestamp())
+
+                    # Explainer fails when boolean columns are passed
+
+                    _, data_trimmed_encoded = _label_encode_dataframe(
+                        data_trimmed,
+                        no_encode={datetime_col_name, self.original_target_column},
+                    )
 
-                if not len(kernel_explnr_vals):
-                    logger.warning(
-                        "No explanations generated. Ensure that additional data has been provided."
+                    kernel_explnr = PermutationExplainer(
+                        model=explain_predict_fn, masker=data_trimmed_encoded
                     )
-                else:
-                    self.global_explanation[s_id] = dict(
-                        zip(
-                            data_trimmed.columns[1:],
-                            np.average(np.absolute(kernel_explnr_vals[:, 1:]), axis=0),
-                        )
+                    kernel_explnr_vals = kernel_explnr.shap_values(data_trimmed_encoded)
+                    exp_end_time = time.time()
+                    global_ex_time = global_ex_time + exp_end_time - exp_start_time
+                    self.local_explainer(
+                        kernel_explnr,
+                        series_id=s_id,
+                        datetime_col_name=datetime_col_name,
                     )
+                    local_ex_time = local_ex_time + time.time() - exp_end_time
+
+                    if not len(kernel_explnr_vals):
+                        logger.warning(
+                            "No explanations generated. Ensure that additional data has been provided."
+                        )
+                    else:
+                        self.global_explanation[s_id] = dict(
+                            zip(
+                                data_trimmed.columns[1:],
+                                np.average(
+                                    np.absolute(kernel_explnr_vals[:, 1:]), axis=0
+                                ),
+                            )
+                        )
+                except Exception as e:
+                    if s_id in self.errors_dict:
+                        self.errors_dict[s_id]["explainer_error"] = str(e)
+                        self.errors_dict[s_id]["explainer_error_trace"] = (
+                            traceback.format_exc()
+                        )
+                    else:
+                        self.errors_dict[s_id] = {
+                            "model_name": self.spec.model,
+                            "explainer_error": str(e),
+                            "explainer_error_trace": traceback.format_exc(),
+                        }
             else:
                 logger.warning(
                     f"Skipping explanations for {s_id}, as forecast was not generated."
@@ -816,6 +834,13 @@ def local_explainer(self, kernel_explainer, series_id, datetime_col_name) -> Non
         local_kernel_explnr_df = pd.DataFrame(
             local_kernel_explnr_vals, columns=data.columns
         )
+
+        # Add date column to local explanation DataFrame
+        local_kernel_explnr_df[ForecastOutputColumns.DATE] = (
+            self.datasets.get_horizon_at_series(
+                s_id=series_id
+            )[self.spec.datetime_column.name].reset_index(drop=True)
+        )
         self.local_explanation[series_id] = local_kernel_explnr_df
 
     def get_explain_predict_fn(self, series_id, fcst_col_name="yhat"):
diff --git a/docs/source/user_guide/operators/anomaly_detection_operator/index.rst b/docs/source/user_guide/operators/anomaly_detection_operator/index.rst
@@ -17,9 +17,9 @@ The Anomaly Detection Operator accepts a dataset with:
 * (Optionally) 1 or more seires columns (such that the target is indexed by datetime and series)
 * (Optionall) An arbitrary number of additional variables
 
-Besides this input data, the user can also specify validation data, if available. Validation data should have all the columns of the input data plus a binary column titled "anomaly". The "anomaly" column should be -1 for anomalies and 1 for normal rows.
+Besides this input data, the user can also specify validation data, if available. Validation data should have all the columns of the input data plus a binary column titled "anomaly". The "anomaly" column should be 1 for anomalies and 0 for normal rows.
 
-Finally the user can provide "test_data" in order to recieve test metrics and evaluate the Operator's performance more easily. Test data should indexed by date and (optionally) series. Test data should have a -1 for anomalous rows and 1 for normal rows.
+Finally the user can provide "test_data" in order to recieve test metrics and evaluate the Operator's performance more easily. Test data should indexed by date and (optionally) series. Test data should have a 1 for anomalous rows and 0 for normal rows.
 
 **Multivariate vs. Univariate Anomaly Detection**