add automlx internal explainability as a mode explainaibility mode

codeloop · codeloop · commit 6a7657496667 · 2024-12-13T08:08:30.000Z
diff --git a/ads/opctl/operator/lowcode/forecast/const.py b/ads/opctl/operator/lowcode/forecast/const.py
@@ -27,10 +27,12 @@ class SpeedAccuracyMode(str, metaclass=ExtendedEnumMeta):
     HIGH_ACCURACY = "HIGH_ACCURACY"
     BALANCED = "BALANCED"
     FAST_APPROXIMATE = "FAST_APPROXIMATE"
+    AUTOMLX = "AUTOMLX"
     ratio = {}
     ratio[HIGH_ACCURACY] = 1  # 100 % data used for generating explanations
     ratio[BALANCED] = 0.5  # 50 % data used for generating explanations
     ratio[FAST_APPROXIMATE] = 0  # constant
+    ratio[AUTOMLX] = 0  # constant
 
 
 class SupportedMetrics(str, metaclass=ExtendedEnumMeta):
diff --git a/ads/opctl/operator/lowcode/forecast/model/automlx.py b/ads/opctl/operator/lowcode/forecast/model/automlx.py
@@ -17,6 +17,7 @@
 from ads.opctl.operator.lowcode.forecast.const import (
     AUTOMLX_METRIC_MAP,
     ForecastOutputColumns,
+    SpeedAccuracyMode,
     SupportedModels,
 )
 from ads.opctl.operator.lowcode.forecast.utils import _label_encode_dataframe
@@ -239,27 +240,27 @@ def _generate_report(self):
                 # If the key is present, call the "explain_model" method
                 self.explain_model()
 
-                # Convert the global explanation data to a DataFrame
-                global_explanation_df = pd.DataFrame(self.global_explanation)
+                global_explanation_section = None
+                if self.spec.explanations_accuracy_mode != SpeedAccuracyMode.AUTOMLX:
+                    # Convert the global explanation data to a DataFrame
+                    global_explanation_df = pd.DataFrame(self.global_explanation)
 
-                self.formatted_global_explanation = (
-                    global_explanation_df / global_explanation_df.sum(axis=0) * 100
-                )
-                self.formatted_global_explanation = (
-                    self.formatted_global_explanation.rename(
+                    self.formatted_global_explanation = (
+                        global_explanation_df / global_explanation_df.sum(axis=0) * 100
+                    )
+                    self.formatted_global_explanation = self.formatted_global_explanation.rename(
                         {self.spec.datetime_column.name: ForecastOutputColumns.DATE},
                         axis=1,
                     )
-                )
 
-                # Create a markdown section for the global explainability
-                global_explanation_section = rc.Block(
-                    rc.Heading("Global Explanation of Models", level=2),
-                    rc.Text(
-                        "The following tables provide the feature attribution for the global explainability."
-                    ),
-                    rc.DataTable(self.formatted_global_explanation, index=True),
-                )
+                    # Create a markdown section for the global explainability
+                    global_explanation_section = rc.Block(
+                        rc.Heading("Global Explanation of Models", level=2),
+                        rc.Text(
+                            "The following tables provide the feature attribution for the global explainability."
+                        ),
+                        rc.DataTable(self.formatted_global_explanation, index=True),
+                    )
 
                 aggregate_local_explanations = pd.DataFrame()
                 for s_id, local_ex_df in self.local_explanation.items():
@@ -284,8 +285,11 @@ def _generate_report(self):
                 )
 
                 # Append the global explanation text and section to the "other_sections" list
+                if global_explanation_section:
+                    other_sections.append(global_explanation_section)
+
+                # Append the local explanation text and section to the "other_sections" list
                 other_sections = other_sections + [
-                    global_explanation_section,
                     local_explanation_section,
                 ]
             except Exception as e:
diff --git a/ads/opctl/operator/lowcode/forecast/model/base_model.py b/ads/opctl/operator/lowcode/forecast/model/base_model.py
@@ -47,7 +47,7 @@
     SpeedAccuracyMode,
     SupportedMetrics,
     SupportedModels,
-    BACKTEST_REPORT_NAME
+    BACKTEST_REPORT_NAME,
 )
 from ..operator_config import ForecastOperatorConfig, ForecastOperatorSpec
 from .forecast_datasets import ForecastDatasets
@@ -259,7 +259,11 @@ def generate_report(self):
                 output_dir = self.spec.output_directory.url
                 file_path = f"{output_dir}/{BACKTEST_REPORT_NAME}"
                 if self.spec.model == AUTO_SELECT:
-                    backtest_sections.append(rc.Heading("Auto-Select Backtesting and Performance Metrics", level=2))
+                    backtest_sections.append(
+                        rc.Heading(
+                            "Auto-Select Backtesting and Performance Metrics", level=2
+                        )
+                    )
                     if not os.path.exists(file_path):
                         failure_msg = rc.Text(
                             "auto-select could not be executed. Please check the "
@@ -268,15 +272,23 @@ def generate_report(self):
                         backtest_sections.append(failure_msg)
                     else:
                         backtest_stats = pd.read_csv(file_path)
-                        model_metric_map = backtest_stats.drop(columns=['metric', 'backtest'])
-                        average_dict = {k: round(v, 4) for k, v in model_metric_map.mean().to_dict().items()}
+                        model_metric_map = backtest_stats.drop(
+                            columns=["metric", "backtest"]
+                        )
+                        average_dict = {
+                            k: round(v, 4)
+                            for k, v in model_metric_map.mean().to_dict().items()
+                        }
                         best_model = min(average_dict, key=average_dict.get)
                         summary_text = rc.Text(
                             f"Overall, the average {self.spec.metric} scores for the models are {average_dict}, with"
-                            f" {best_model} being identified as the top-performing model during backtesting.")
+                            f" {best_model} being identified as the top-performing model during backtesting."
+                        )
                         backtest_table = rc.DataTable(backtest_stats, index=True)
                         liner_plot = get_auto_select_plot(backtest_stats)
-                        backtest_sections.extend([backtest_table, summary_text, liner_plot])
+                        backtest_sections.extend(
+                            [backtest_table, summary_text, liner_plot]
+                        )
 
                 forecast_plots = []
                 if len(self.forecast_output.list_series_ids()) > 0:
@@ -643,6 +655,12 @@ def _save_model(self, output_dir, storage_options):
             "Please run `python3 -m pip install shap` to install the required dependencies for model explanation."
         ),
     )
+    @runtime_dependency(
+        module="automlx",
+        err_msg=(
+            "Please run `python3 -m pip install automlx` to install the required dependencies for model explanation."
+        ),
+    )
     def explain_model(self):
         """
         Generates an explanation for the model by using the SHAP (Shapley Additive exPlanations) library.
@@ -668,7 +686,44 @@ def explain_model(self):
         for s_id, data_i in self.datasets.get_data_by_series(
             include_horizon=False
         ).items():
-            if s_id in self.models:
+            if (
+                self.spec.model == SupportedModels.AutoMLX
+                and self.spec.explanations_accuracy_mode == SpeedAccuracyMode.AUTOMLX
+            ):
+                import automlx
+
+                explainer = automlx.MLExplainer(
+                    self.models[s_id],
+                    self.datasets.additional_data.get_data_for_series(series_id=s_id)
+                    .drop(self.spec.datetime_column.name, axis=1)
+                    .head(-self.spec.horizon)
+                    if self.spec.additional_data
+                    else None,
+                    pd.DataFrame(data_i[self.spec.target_column]),
+                    task="forecasting",
+                )
+
+                explanations = explainer.explain_prediction(
+                    X=self.datasets.additional_data.get_data_for_series(series_id=s_id)
+                    .drop(self.spec.datetime_column.name, axis=1)
+                    .tail(self.spec.horizon)
+                    if self.spec.additional_data
+                    else None,
+                    forecast_timepoints=list(range(self.spec.horizon + 1)),
+                )
+
+                explanations_df = pd.concat(
+                    [exp.to_dataframe() for exp in explanations]
+                )
+                explanations_df["row"] = explanations_df.groupby("Feature").cumcount()
+                explanations_df = explanations_df.pivot(
+                    index="row", columns="Feature", values="Attribution"
+                )
+                explanations_df = explanations_df.reset_index(drop=True)
+                # explanations_df[self.spec.datetime_column.name]=self.datasets.additional_data.get_data_for_series(series_id=s_id).tail(self.spec.horizon)[self.spec.datetime_column.name].reset_index(drop=True)
+                self.local_explanation[s_id] = explanations_df
+
+            elif s_id in self.models:
                 explain_predict_fn = self.get_explain_predict_fn(series_id=s_id)
                 data_trimmed = data_i.tail(
                     max(int(len(data_i) * ratio), 5)
@@ -699,6 +754,14 @@ def explain_model(self):
                     logger.warn(
                         "No explanations generated. Ensure that additional data has been provided."
                     )
+                elif (
+                    self.spec.model == SupportedModels.AutoMLX
+                    and self.spec.explanations_accuracy_mode
+                    == SpeedAccuracyMode.AUTOMLX
+                ):
+                    logger.warning(
+                        "Global explanations not available for AutoMLX models with inherent explainability"
+                    )
                 else:
                     self.global_explanation[s_id] = dict(
                         zip(
diff --git a/ads/opctl/operator/lowcode/forecast/schema.yaml b/ads/opctl/operator/lowcode/forecast/schema.yaml
@@ -332,6 +332,7 @@ spec:
         - HIGH_ACCURACY
         - BALANCED
         - FAST_APPROXIMATE
+        - AUTOMLX
 
     generate_report:
       type: boolean