update prophet explanations

ahosler · ahosler · commit 48b27b3c2159 · 2025-03-16T15:41:54.000Z
diff --git a/ads/opctl/operator/lowcode/forecast/model/base_model.py b/ads/opctl/operator/lowcode/forecast/model/base_model.py
@@ -635,7 +635,9 @@ def _save_report(
         )
         if self.errors_dict:
             write_data(
-                data=pd.DataFrame.from_dict(self.errors_dict),
+                data=pd.DataFrame(
+                    self.errors_dict, index=np.arange(len(self.errors_dict.keys()))
+                ),
                 filename=os.path.join(
                     unique_output_dir, self.spec.errors_dict_filename
                 ),
diff --git a/ads/opctl/operator/lowcode/forecast/model/neuralprophet.py b/ads/opctl/operator/lowcode/forecast/model/neuralprophet.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
+# Copyright (c) 2023, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 import logging
@@ -153,10 +153,8 @@ def _train_model(self, i, s_id, df, model_kwargs):
             cols_to_read = filter(
                 lambda x: x.startswith("future_regressor"), forecast.columns
             )
-            self.explanations_info[s_id] = forecast[cols_to_read]
-            self.explanations_info[s_id]["Date"] = forecast["ds"]
-            self.explanations_info[s_id] = self.explanations_info[s_id].set_index(
-                "Date"
+            self.explanations_info[s_id] = (
+                forecast[cols_to_read].rename({"ds": "Date"}, axis=1).set_index("Date")
             )
 
             self.outputs[s_id] = forecast
diff --git a/ads/opctl/operator/lowcode/forecast/model/prophet.py b/ads/opctl/operator/lowcode/forecast/model/prophet.py
@@ -22,7 +22,6 @@
 from ..const import (
     DEFAULT_TRIALS,
     PROPHET_INTERNAL_DATE_COL,
-    ForecastOutputColumns,
     SupportedModels,
 )
 from .base_model import ForecastOperatorBaseModel
@@ -123,6 +122,14 @@ def _train_model(self, i, series_id, df, model_kwargs):
                 upper_bound=self.get_horizon(forecast["yhat_upper"]).values,
                 lower_bound=self.get_horizon(forecast["yhat_lower"]).values,
             )
+            # Get all features that make up the forecast. Exclude CI (upper/lower) and drop yhat ([:-1])
+            core_columns = forecast.columns[
+                ~forecast.columns.str.endswith("_lower")
+                & ~forecast.columns.str.endswith("_upper")
+            ][:-1]
+            self.explanations_info[series_id] = (
+                forecast[core_columns].rename({"ds": "Date"}, axis=1).set_index("Date")
+            )
 
             self.models[series_id] = {}
             self.models[series_id]["model"] = model
@@ -151,6 +158,7 @@ def _build_model(self) -> pd.DataFrame:
         full_data_dict = self.datasets.get_data_by_series()
         self.models = {}
         self.outputs = {}
+        self.explanations_info = {}
         self.additional_regressors = self.datasets.get_additional_data_column_names()
         model_kwargs = self.set_kwargs()
         self.forecast_output = ForecastOutput(
@@ -257,6 +265,25 @@ def objective(trial):
         model_kwargs_i = study.best_params
         return model_kwargs_i
 
+    def explain_model(self):
+        self.local_explanation = {}
+        global_expl = []
+
+        for s_id, expl_df in self.explanations_info.items():
+            # Local Expl
+            self.local_explanation[s_id] = self.get_horizon(expl_df)
+            self.local_explanation[s_id]["Series"] = s_id
+            self.local_explanation[s_id].index.rename(self.dt_column_name, inplace=True)
+            # Global Expl
+            g_expl = self.drop_horizon(expl_df).mean()
+            g_expl.name = s_id
+            global_expl.append(g_expl)
+        self.global_explanation = pd.concat(global_expl, axis=1)
+        self.formatted_global_explanation = (
+            self.global_explanation / self.global_explanation.sum(axis=0) * 100
+        )
+        self.formatted_local_explanation = pd.concat(self.local_explanation.values())
+
     def _generate_report(self):
         import report_creator as rc
         from prophet.plot import add_changepoints_to_plot
@@ -335,22 +362,6 @@ def _generate_report(self):
                 # If the key is present, call the "explain_model" method
                 self.explain_model()
 
-                # Convert the global explanation data to a DataFrame
-                global_explanation_df = pd.DataFrame(self.global_explanation)
-
-                self.formatted_global_explanation = (
-                    global_explanation_df / global_explanation_df.sum(axis=0) * 100
-                )
-
-                aggregate_local_explanations = pd.DataFrame()
-                for s_id, local_ex_df in self.local_explanation.items():
-                    local_ex_df_copy = local_ex_df.copy()
-                    local_ex_df_copy[ForecastOutputColumns.SERIES] = s_id
-                    aggregate_local_explanations = pd.concat(
-                        [aggregate_local_explanations, local_ex_df_copy], axis=0
-                    )
-                self.formatted_local_explanation = aggregate_local_explanations
-
                 if not self.target_cat_col:
                     self.formatted_global_explanation = (
                         self.formatted_global_explanation.rename(
@@ -364,7 +375,7 @@ def _generate_report(self):
 
                 # Create a markdown section for the global explainability
                 global_explanation_section = rc.Block(
-                    rc.Heading("Global Explanation of Models", level=2),
+                    rc.Heading("Global Explainability", level=2),
                     rc.Text(
                         "The following tables provide the feature attribution for the global explainability."
                     ),
@@ -373,7 +384,7 @@ def _generate_report(self):
 
                 blocks = [
                     rc.DataTable(
-                        local_ex_df.div(local_ex_df.abs().sum(axis=1), axis=0) * 100,
+                        local_ex_df.drop("Series", axis=1),
                         label=s_id if self.target_cat_col else None,
                         index=True,
                     )
@@ -393,6 +404,8 @@ def _generate_report(self):
                 # Do not fail the whole run due to explanations failure
                 logger.warning(f"Failed to generate Explanations with error: {e}.")
                 logger.debug(f"Full Traceback: {traceback.format_exc()}")
+                self.errors_dict["explainer_error"] = str(e)
+                self.errors_dict["explainer_error_error"] = traceback.format_exc()
 
         model_description = rc.Text(
             """Prophet is a procedure for forecasting time series data based on an additive model where non-linear trends are fit with yearly, weekly, and daily seasonality, plus holiday effects. It works best with time series that have strong seasonal effects and several seasons of historical data. Prophet is robust to missing data and shifts in the trend, and typically handles outliers well."""
diff --git a/tests/operators/forecast/test_errors.py b/tests/operators/forecast/test_errors.py
@@ -961,6 +961,7 @@ def test_generate_files(operator_setup, model):
     yaml_i["spec"]["generate_forecast_file"] = False
     yaml_i["spec"]["generate_metrics_file"] = False
     yaml_i["spec"]["generate_explanations"] = True
+    yaml_i["spec"]["model_kwargs"] = {"min": 0, "max": 20}
 
     df = pd.concat([HISTORICAL_DATETIME_COL[:15], TARGET_COL[:15]], axis=1)
     df_add = pd.concat([HISTORICAL_DATETIME_COL[:18], ADD_COLS[:18]], axis=1)
@@ -971,8 +972,7 @@ def test_generate_files(operator_setup, model):
     files = os.listdir(operator_setup)
     if "errors.json" in files:
         with open(os.path.join(operator_setup, "errors.json")) as f:
-            print(f"Errors in build! {f.read()}")
-            assert False, "Failed due to errors.json being created"
+            assert False, f"Failed due to errors.json being created: {f.read()}"
     assert "report.html" in files, "Failed to generate report"
     assert (
         "forecast.csv" not in files
@@ -988,7 +988,6 @@ def test_generate_files(operator_setup, model):
     ), "Generated metrics file, but `generate_explanation_files` was set False"
     assert not results.get_forecast().empty
     assert not results.get_metrics().empty
-    print(f"global expl: {results.get_global_explanations()}")
     assert not results.get_global_explanations().empty
     assert not results.get_local_explanations().empty