Skip to content

Commit b065786

Browse files
authored
Merge branch 'main' into feature/odsc68406/amlx_global_explainer
2 parents 603e203 + 1e99804 commit b065786

File tree

7 files changed

+52
-35
lines changed

7 files changed

+52
-35
lines changed

ads/opctl/operator/lowcode/forecast/model/arima.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,10 @@ def _train_model(self, i, s_id, df, model_kwargs):
116116
lower_bound=self.get_horizon(forecast["yhat_lower"]).values,
117117
)
118118

119-
self.models[s_id] = model
119+
self.models[s_id] = {}
120+
self.models[s_id]["model"] = model
121+
self.models[s_id]["le"] = self.le[s_id]
122+
self.models[s_id]["predict_component_cols"] = X_pred.columns
120123

121124
params = vars(model).copy()
122125
for param in ["arima_res_", "endog_index_"]:
@@ -163,7 +166,7 @@ def _generate_report(self):
163166
sec5_text = rc.Heading("ARIMA Model Parameters", level=2)
164167
blocks = [
165168
rc.Html(
166-
m.summary().as_html(),
169+
m['model'].summary().as_html(),
167170
label=s_id if self.target_cat_col else None,
168171
)
169172
for i, (s_id, m) in enumerate(self.models.items())
@@ -251,7 +254,7 @@ def _generate_report(self):
251254
def get_explain_predict_fn(self, series_id):
252255
def _custom_predict(
253256
data,
254-
model=self.models[series_id],
257+
model=self.models[series_id]["model"],
255258
dt_column_name=self.datasets._datetime_column_name,
256259
target_col=self.original_target_column,
257260
):

ads/opctl/operator/lowcode/forecast/model/automlx.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@ def set_kwargs(self):
5656
)
5757
return model_kwargs_cleaned, time_budget
5858

59-
def preprocess(self, data): # TODO: re-use self.le for explanations
60-
_, df_encoded = _label_encode_dataframe(
59+
def preprocess(self, data, series_id): # TODO: re-use self.le for explanations
60+
self.le[series_id], df_encoded = _label_encode_dataframe(
6161
data,
6262
no_encode={self.spec.datetime_column.name, self.original_target_column},
6363
)
@@ -125,7 +125,7 @@ def _build_model(self) -> pd.DataFrame:
125125
self.forecast_output.init_series_output(
126126
series_id=s_id, data_at_series=df
127127
)
128-
data = self.preprocess(df)
128+
data = self.preprocess(df, s_id)
129129
data_i = self.drop_horizon(data)
130130
X_pred = self.get_horizon(data).drop(target, axis=1)
131131

@@ -157,7 +157,9 @@ def _build_model(self) -> pd.DataFrame:
157157
target
158158
].values
159159

160-
self.models[s_id] = model
160+
self.models[s_id] = {}
161+
self.models[s_id]["model"] = model
162+
self.models[s_id]["le"] = self.le[s_id]
161163

162164
# In case of Naive model, model.forecast function call does not return confidence intervals.
163165
if f"{target}_ci_upper" not in summary_frame:
@@ -218,7 +220,8 @@ def _generate_report(self):
218220
other_sections = []
219221

220222
if len(self.models) > 0:
221-
for s_id, m in models.items():
223+
for s_id, artifacts in models.items():
224+
m = artifacts["model"]
222225
selected_models[s_id] = {
223226
"series_id": s_id,
224227
"selected_model": m.selected_model_,
@@ -326,7 +329,7 @@ def _generate_report(self):
326329
)
327330

328331
def get_explain_predict_fn(self, series_id):
329-
selected_model = self.models[series_id]
332+
selected_model = self.models[series_id]["model"]
330333

331334
# If training date, use method below. If future date, use forecast!
332335
def _custom_predict_fn(
@@ -344,12 +347,12 @@ def _custom_predict_fn(
344347
data[dt_column_name] = seconds_to_datetime(
345348
data[dt_column_name], dt_format=self.spec.datetime_column.format
346349
)
347-
data = self.preprocess(data)
350+
data = self.preprocess(data, series_id)
348351
horizon_data = horizon_data.drop(target_col, axis=1)
349352
horizon_data[dt_column_name] = seconds_to_datetime(
350353
horizon_data[dt_column_name], dt_format=self.spec.datetime_column.format
351354
)
352-
horizon_data = self.preprocess(horizon_data)
355+
horizon_data = self.preprocess(horizon_data, series_id)
353356

354357
rows = []
355358
for i in range(data.shape[0]):

ads/opctl/operator/lowcode/forecast/model/base_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -802,7 +802,7 @@ def local_explainer(self, kernel_explainer, series_id, datetime_col_name) -> Non
802802
def get_explain_predict_fn(self, series_id, fcst_col_name="yhat"):
803803
def _custom_predict(
804804
data,
805-
model=self.models[series_id],
805+
model=self.models[series_id]["model"],
806806
dt_column_name=self.datasets._datetime_column_name,
807807
):
808808
"""

ads/opctl/operator/lowcode/forecast/model/neuralprophet.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,8 +172,10 @@ def _train_model(self, i, s_id, df, model_kwargs):
172172
).values,
173173
)
174174

175-
self.models[s_id] = model
176175
self.trainers[s_id] = model.trainer
176+
self.models[s_id] = {}
177+
self.models[s_id]["model"] = model
178+
self.models[s_id]["le"] = self.le[s_id]
177179

178180
self.model_parameters[s_id] = {
179181
"framework": SupportedModels.NeuralProphet,
@@ -355,7 +357,8 @@ def _generate_report(self):
355357

356358
sec5_text = rc.Heading("Neural Prophet Model Parameters", level=2)
357359
model_states = []
358-
for s_id, m in self.models.items():
360+
for s_id, artifacts in self.models.items():
361+
m = artifacts["model"]
359362
model_states.append(
360363
pd.Series(
361364
m.state_dict(),

ads/opctl/operator/lowcode/forecast/model/prophet.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,10 @@ def _train_model(self, i, series_id, df, model_kwargs):
108108
upper_bound=self.get_horizon(forecast["yhat_upper"]).values,
109109
lower_bound=self.get_horizon(forecast["yhat_lower"]).values,
110110
)
111-
self.models[series_id] = model
111+
112+
self.models[series_id] = {}
113+
self.models[series_id]["model"] = model
114+
self.models[series_id]["le"] = self.le[series_id]
112115

113116
params = vars(model).copy()
114117
for param in ["history", "history_dates", "stan_fit"]:
@@ -252,7 +255,7 @@ def _generate_report(self):
252255
all_sections = []
253256
if len(series_ids) > 0:
254257
sec1 = _select_plot_list(
255-
lambda s_id: self.models[s_id].plot(
258+
lambda s_id: self.models[s_id]["model"].plot(
256259
self.outputs[s_id], include_legend=True
257260
),
258261
series_ids=series_ids,
@@ -267,7 +270,7 @@ def _generate_report(self):
267270
)
268271

269272
sec2 = _select_plot_list(
270-
lambda s_id: self.models[s_id].plot_components(self.outputs[s_id]),
273+
lambda s_id: self.models[s_id]["model"].plot_components(self.outputs[s_id]),
271274
series_ids=series_ids,
272275
target_category_column=self.target_cat_col
273276
)
@@ -276,11 +279,11 @@ def _generate_report(self):
276279
)
277280

278281
sec3_figs = {
279-
s_id: self.models[s_id].plot(self.outputs[s_id]) for s_id in series_ids
282+
s_id: self.models[s_id]["model"].plot(self.outputs[s_id]) for s_id in series_ids
280283
}
281284
for s_id in series_ids:
282285
add_changepoints_to_plot(
283-
sec3_figs[s_id].gca(), self.models[s_id], self.outputs[s_id]
286+
sec3_figs[s_id].gca(), self.models[s_id]["model"], self.outputs[s_id]
284287
)
285288
sec3 = _select_plot_list(
286289
lambda s_id: sec3_figs[s_id],
@@ -294,7 +297,7 @@ def _generate_report(self):
294297
sec5_text = rc.Heading("Prophet Model Seasonality Components", level=2)
295298
model_states = []
296299
for s_id in series_ids:
297-
m = self.models[s_id]
300+
m = self.models[s_id]["model"]
298301
model_states.append(
299302
pd.Series(
300303
m.seasonalities,

ads/opctl/operator/lowcode/forecast/whatifserve/score.py

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -151,34 +151,42 @@ def get_forecast(future_df, model_name, series_id, model_object, date_col, targe
151151
pred_obj = model_object.predict(future_regressor=future_reg)
152152
return pred_obj.forecast[series_id].tolist()
153153
elif model_name == SupportedModels.Prophet and series_id in model_object:
154-
model = model_object[series_id]
154+
model = model_object[series_id]['model']
155+
label_encoder = model_object[series_id]['le']
155156
processed = future_df.rename(columns={date_col_name: 'ds', target_column: 'y'})
156-
forecast = model.predict(processed)
157+
encoded_df = label_encoder.transform(processed)
158+
forecast = model.predict(encoded_df)
157159
return forecast['yhat'].tolist()
158160
elif model_name == SupportedModels.NeuralProphet and series_id in model_object:
159-
model = model_object[series_id]
161+
model = model_object[series_id]['model']
162+
label_encoder = model_object[series_id]['le']
160163
model.restore_trainer()
161164
accepted_regressors = list(model.config_regressors.regressors.keys())
162165
data = future_df.rename(columns={date_col_name: 'ds', target_column: 'y'})
163-
future = data[accepted_regressors + ["ds"]].reset_index(drop=True)
166+
encoded_df = label_encoder.transform(data)
167+
future = encoded_df[accepted_regressors + ["ds"]].reset_index(drop=True)
164168
future["y"] = None
165169
forecast = model.predict(future)
166170
return forecast['yhat1'].tolist()
167171
elif model_name == SupportedModels.Arima and series_id in model_object:
168-
model = model_object[series_id]
169-
future_df = future_df.set_index(date_col_name)
170-
x_pred = future_df.drop(target_cat_col, axis=1)
172+
model = model_object[series_id]['model']
173+
label_encoder = model_object[series_id]['le']
174+
predict_cols = model_object[series_id]["predict_component_cols"]
175+
encoded_df = label_encoder.transform(future_df)
176+
x_pred = encoded_df.set_index(date_col_name)
177+
x_pred = x_pred.drop(target_cat_col, axis=1)
171178
yhat, conf_int = model.predict(
172179
n_periods=horizon,
173-
X=x_pred,
180+
X=x_pred[predict_cols],
174181
return_conf_int=True
175182
)
176183
yhat_clean = pd.DataFrame(yhat, index=yhat.index, columns=["yhat"])
177184
return yhat_clean['yhat'].tolist()
178185
elif model_name == SupportedModels.AutoMLX and series_id in model_object:
179-
# automlx model
180-
model = model_object[series_id]
181-
x_pred = future_df.drop(target_cat_col, axis=1)
186+
model = model_object[series_id]['model']
187+
label_encoder = model_object[series_id]['le']
188+
encoded_df = label_encoder.transform(future_df)
189+
x_pred = encoded_df.drop(target_cat_col, axis=1)
182190
x_pred = x_pred.set_index(date_col_name)
183191
forecast = model.forecast(
184192
X=x_pred,

tests/operators/forecast/test_errors.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -875,10 +875,7 @@ def test_what_if_analysis(operator_setup, model):
875875
historical_filtered = historical_data[historical_data['Date'] > "2013-03-01"]
876876
additional_data = pd.read_csv(additional_data_path, parse_dates=["Date"])
877877
add_filtered = additional_data[additional_data['Date'] > "2013-03-01"]
878-
numeric_columns = add_filtered.select_dtypes(include=['number', 'object', 'datetime64'])
879-
non_constant_columns = numeric_columns.columns[(numeric_columns != numeric_columns.iloc[0]).any()]
880-
df_non_constant = numeric_columns[non_constant_columns.union(['Store'])]
881-
df_non_constant.to_csv(f'{additional_test_path}', index=False)
878+
add_filtered.to_csv(f'{additional_test_path}', index=False)
882879
historical_filtered.to_csv(f'{historical_test_path}', index=False)
883880

884881
yaml_i, output_data_path = populate_yaml(

0 commit comments

Comments
 (0)