Skip to content

Commit 7265aac

Browse files
authored
Forecasting: target_col removal from outputs, fixed prophet and autots bugs (#537)
2 parents 1f9d0fa + 68dadd3 commit 7265aac

File tree

7 files changed

+75
-44
lines changed

7 files changed

+75
-44
lines changed

ads/opctl/operator/lowcode/forecast/model/arima.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ def _train_model(self, i, target, df):
128128
for param in ['arima_res_', 'endog_index_']:
129129
if param in params:
130130
params.pop(param)
131-
self.model_parameters[target] = {
131+
self.model_parameters[utils.convert_target(target, self.original_target_column)] = {
132132
"framework": SupportedModels.Arima,
133133
**params,
134134
}
@@ -197,7 +197,7 @@ def _generate_report(self):
197197

198198
sec5_text = dp.Text(f"## ARIMA Model Parameters")
199199
blocks = [
200-
dp.HTML(m.summary().as_html(), label=target)
200+
dp.HTML(m.summary().as_html(), label=utils.convert_target(target, self.original_target_column))
201201
for i, (target, m) in enumerate(self.models.items())
202202
]
203203
sec5 = dp.Select(blocks=blocks) if len(blocks) > 1 else blocks[0]
@@ -242,7 +242,7 @@ def _generate_report(self):
242242
blocks = [
243243
dp.DataTable(
244244
local_ex_df.div(local_ex_df.abs().sum(axis=1), axis=0) * 100,
245-
label=s_id,
245+
label=utils.convert_target(s_id, self.original_target_column),
246246
)
247247
for s_id, local_ex_df in self.local_explanation.items()
248248
]

ads/opctl/operator/lowcode/forecast/model/automlx.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ def _build_model(self) -> pd.DataFrame:
164164
outputs[target] = summary_frame
165165
# outputs_legacy[target] = summary_frame
166166

167-
self.model_parameters[target] = {
167+
self.model_parameters[utils.convert_target(target, self.original_target_column)] = {
168168
"framework": SupportedModels.AutoMLX,
169169
"score_metric": model.score_metric,
170170
"random_state": model.random_state,
@@ -250,7 +250,7 @@ def _generate_report(self):
250250
models = self.models
251251
for i, (target, df) in enumerate(self.full_data_dict.items()):
252252
selected_models[target] = {
253-
"series_id": target,
253+
"series_id": utils.convert_target(target, self.original_target_column),
254254
"selected_model": models[target].selected_model_,
255255
"model_params": models[target].selected_model_params_,
256256
}
@@ -304,7 +304,7 @@ def _generate_report(self):
304304
blocks = [
305305
dp.DataTable(
306306
local_ex_df.div(local_ex_df.abs().sum(axis=1), axis=0) * 100,
307-
label=s_id,
307+
label=utils.convert_target(s_id, self.original_target_column),
308308
)
309309
for s_id, local_ex_df in self.local_explanation.items()
310310
]

ads/opctl/operator/lowcode/forecast/model/autots.py

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def _build_model(self) -> pd.DataFrame:
7878
drop_data_older_than_periods=self.spec.model_kwargs.get(
7979
"drop_data_older_than_periods", None
8080
),
81-
model_list=self.spec.model_kwargs.get("model_list", "fast_parallel"),
81+
model_list=self.spec.model_kwargs.get("model_list", "fast_parallel"),
8282
transformer_list=self.spec.model_kwargs.get("transformer_list", "auto"),
8383
transformer_max_depth=self.spec.model_kwargs.get(
8484
"transformer_max_depth", 6
@@ -137,10 +137,7 @@ def _build_model(self) -> pd.DataFrame:
137137
columns="series_id",
138138
values=list(
139139
self.original_additional_data.set_index(
140-
[
141-
self.spec.target_category_columns[0],
142-
self.spec.datetime_column.name,
143-
]
140+
self.spec.target_category_columns + [self.spec.datetime_column.name]
144141
).columns
145142
),
146143
),
@@ -225,7 +222,7 @@ def _build_model(self) -> pd.DataFrame:
225222
category=cat, target_category_column=cat_target, forecast=output_i
226223
)
227224

228-
self.model_parameters[cat_target] = {
225+
self.model_parameters[utils.convert_target(cat_target, self.original_target_column)] = {
229226
"framework": SupportedModels.AutoTS,
230227
**params,
231228
}
@@ -257,15 +254,28 @@ def _generate_report(self) -> tuple:
257254
"## Forecast Overview \n"
258255
"These plots show your forecast in the context of historical data."
259256
)
257+
258+
# Default title generated by autots has target_col in it. Modified function to get rid of it.
259+
def get_title(idx, target):
260+
from autots.models.base import extract_single_series_from_horz
261+
title_prelim = extract_single_series_from_horz(
262+
self.models.df_wide_numeric.columns[idx],
263+
model_name=self.prediction.model_name,
264+
model_parameters=self.prediction.model_parameters,
265+
)[0:80]
266+
return f"{utils.convert_target(target, self.original_target_column)} with model {title_prelim}"
267+
260268
sec_1 = utils._select_plot_list(
261-
lambda idx, *args: self.prediction.plot(
269+
lambda idx, target, *args: self.prediction.plot(
262270
self.models.df_wide_numeric,
263271
series=self.models.df_wide_numeric.columns[idx],
264272
start_date=self.models.df_wide_numeric.reset_index()[
265273
self.spec.datetime_column.name
266274
].min(),
275+
title=get_title(idx, target)
267276
),
268277
target_columns=self.target_columns,
278+
original_target_column=self.original_target_column
269279
)
270280

271281
# Section 2: AutoTS Model Parameters
@@ -323,8 +333,8 @@ def _generate_report(self) -> tuple:
323333
blocks = [
324334
dp.DataTable(
325335
local_ex_df.div(local_ex_df.abs().sum(axis=1), axis=0) * 100,
326-
label=s_id,
327-
)
336+
label=utils.convert_target(s_id, self.original_target_column),
337+
)
328338
for s_id, local_ex_df in self.local_explanation.items()
329339
]
330340
local_explanation_section = (
@@ -364,7 +374,7 @@ def _generate_train_metrics(self) -> pd.DataFrame:
364374
Generate Training Metrics when fitted data is not available.
365375
The method that needs to be implemented on the particular model level.
366376
367-
metrics Sales_Store 1
377+
metrics Store 1
368378
sMAPE 26.19
369379
MAPE 2.96E+18
370380
RMSE 2014.192531
@@ -375,4 +385,7 @@ def _generate_train_metrics(self) -> pd.DataFrame:
375385
scores = pd.DataFrame(
376386
self.models.best_model_per_series_score(), columns=["AutoTS Score"]
377387
).T
378-
return pd.concat([mapes, scores])
388+
df = pd.concat([mapes, scores])
389+
new_column_names = {old_name: utils.convert_target(old_name, self.original_target_column)
390+
for old_name in df.columns}
391+
return df.rename(columns=new_column_names)

ads/opctl/operator/lowcode/forecast/model/base_model.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ def generate_report(self):
106106
self.datasets,
107107
self.forecast_output,
108108
self.spec.datetime_column.name,
109+
self.original_target_column,
109110
target_col=self.forecast_col_name,
110111
)
111112
else:
@@ -126,6 +127,7 @@ def generate_report(self):
126127
target_columns=self.target_columns,
127128
test_filename=self.spec.test_data.url,
128129
output=self.forecast_output,
130+
original_target_column=self.original_target_column,
129131
target_col=self.forecast_col_name,
130132
elapsed_time=elapsed_time,
131133
)
@@ -145,12 +147,13 @@ def generate_report(self):
145147

146148
title_text = dp.Text("# Forecast Report")
147149

148-
md_columns = " * ".join([f"{x} \n" for x in self.target_columns])
150+
md_columns = " * ".join([f"{utils.convert_target(x,self.original_target_column)} \n"
151+
for x in self.target_columns])
149152
first_10_rows_blocks = [
150153
dp.DataTable(
151154
df.head(10).rename({col: self.spec.target_column}, axis=1),
152155
caption="Start",
153-
label=col,
156+
label=utils.convert_target(col, self.original_target_column),
154157
)
155158
for col, df in self.full_data_dict.items()
156159
]
@@ -159,7 +162,7 @@ def generate_report(self):
159162
dp.DataTable(
160163
df.tail(10).rename({col: self.spec.target_column}, axis=1),
161164
caption="End",
162-
label=col,
165+
label=utils.convert_target(col, self.original_target_column),
163166
)
164167
for col, df in self.full_data_dict.items()
165168
]
@@ -168,7 +171,7 @@ def generate_report(self):
168171
dp.DataTable(
169172
df.rename({col: self.spec.target_column}, axis=1).describe(),
170173
caption="Summary Statistics",
171-
label=col,
174+
label=utils.convert_target(col, self.original_target_column),
172175
)
173176
for col, df in self.full_data_dict.items()
174177
]
@@ -224,7 +227,7 @@ def generate_report(self):
224227
),
225228
dp.Text(
226229
"The following report compares a variety of metrics and plots "
227-
f"for your target columns: \n {md_columns}.\n",
230+
f"for your target columns: \n * {md_columns}.\n",
228231
label="Target Columns",
229232
),
230233
]
@@ -255,6 +258,7 @@ def generate_report(self):
255258
forecast_sec = utils.get_forecast_plots(
256259
self.forecast_output,
257260
self.target_columns,
261+
self.original_target_column,
258262
horizon=self.spec.horizon,
259263
test_data=test_data,
260264
ci_interval_width=self.spec.confidence_interval_width,
@@ -281,7 +285,7 @@ def generate_report(self):
281285
)
282286

283287
def _test_evaluate_metrics(
284-
self, target_columns, test_filename, output, target_col="yhat", elapsed_time=0
288+
self, target_columns, test_filename, output, original_target_column, target_col="yhat", elapsed_time=0
285289
):
286290
total_metrics = pd.DataFrame()
287291
summary_metrics = pd.DataFrame()
@@ -336,7 +340,7 @@ def _test_evaluate_metrics(
336340
metrics_df = utils._build_metrics_df(
337341
y_true=y_true[-self.spec.horizon:],
338342
y_pred=y_pred[-self.spec.horizon:],
339-
column_name=target_column_i,
343+
column_name=utils.convert_target(target_column_i, original_target_column),
340344
)
341345
total_metrics = pd.concat([total_metrics, metrics_df], axis=1)
342346
else:
@@ -685,7 +689,7 @@ def explain_model(self, datetime_col_name, explain_predict_fn) -> dict:
685689
f"No explanations generated. Ensure that additional data has been provided."
686690
)
687691
else:
688-
self.global_explanation[series_id] = dict(
692+
self.global_explanation[utils.convert_target(series_id, self.original_target_column)] = dict(
689693
zip(
690694
data_trimmed.columns[1:],
691695
np.average(np.absolute(kernel_explnr_vals[:, 1:]), axis=0),
@@ -734,4 +738,4 @@ def local_explainer(self, kernel_explainer, series_id, datetime_col_name) -> Non
734738
["series_id", self.spec.target_column], axis=1, inplace=True
735739
)
736740

737-
self.local_explanation[series_id] = local_kernel_explnr_df
741+
self.local_explanation[utils.convert_target(series_id, self.original_target_column)] = local_kernel_explnr_df

ads/opctl/operator/lowcode/forecast/model/neuralprophet.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@ def _load_model(self):
8181
except:
8282
logger.info("model.pkl/trainer.pkl is not present")
8383

84-
8584
def _train_model(self, i, target, df):
8685

8786
try:
@@ -236,7 +235,7 @@ def objective(trial):
236235
self.models[target] = model
237236
self.trainers[target] = model.trainer
238237

239-
self.model_parameters[target] = {
238+
self.model_parameters[utils.convert_target(target, self.original_target_column)] = {
240239
"framework": SupportedModels.NeuralProphet,
241240
"config": model.config,
242241
"config_trend": model.config_trend,
@@ -259,7 +258,7 @@ def objective(trial):
259258
"highlight_forecast_step_n": model.highlight_forecast_step_n,
260259
"true_ar_weights": model.true_ar_weights,
261260
}
262-
261+
263262
logger.debug("===========Done===========")
264263
except Exception as e:
265264
self.errors_dict[target] = {"model_name": self.spec.model, "error": str(e)}
@@ -286,7 +285,6 @@ def _build_model(self) -> pd.DataFrame:
286285
if self.loaded_trainers is not None:
287286
self.trainers = self.loaded_trainers
288287

289-
290288
# Merge the outputs from each model into 1 df with all outputs by target and category
291289
col = self.original_target_column
292290
output_col = pd.DataFrame()
@@ -349,18 +347,21 @@ def _generate_report(self):
349347
sec1 = utils._select_plot_list(
350348
lambda idx, target, *args: self.models[target].plot(self.outputs[target]),
351349
target_columns=self.target_columns,
350+
original_target_column=self.original_target_column
352351
)
353352

354353
sec2_text = dp.Text(f"## Forecast Broken Down by Trend Component")
355354
sec2 = utils._select_plot_list(
356355
lambda idx, target, *args: self.models[target].plot_components(self.outputs[target]),
357356
target_columns=self.target_columns,
357+
original_target_column=self.original_target_column
358358
)
359359

360360
sec3_text = dp.Text(f"## Forecast Parameter Plots")
361361
sec3 = utils._select_plot_list(
362362
lambda idx, target, *args: self.models[target].plot_parameters(),
363363
target_columns=self.target_columns,
364+
original_target_column=self.original_target_column
364365
)
365366

366367
sec5_text = dp.Text(f"## Neural Prophet Model Parameters")
@@ -370,7 +371,7 @@ def _generate_report(self):
370371
pd.Series(
371372
m.state_dict(),
372373
index=m.state_dict().keys(),
373-
name=target,
374+
name=utils.convert_target(target, self.original_target_column),
374375
)
375376
)
376377
all_model_states = pd.concat(model_states, axis=1)
@@ -406,7 +407,7 @@ def _generate_report(self):
406407
global_explanation_df = pd.DataFrame(self.global_explanation)
407408

408409
self.formatted_global_explanation = (
409-
global_explanation_df / global_explanation_df.sum(axis=0) * 100
410+
global_explanation_df / global_explanation_df.sum(axis=0) * 100
410411
)
411412

412413
# Create a markdown section for the global explainability
@@ -428,7 +429,7 @@ def _generate_report(self):
428429
blocks = [
429430
dp.DataTable(
430431
local_ex_df.div(local_ex_df.abs().sum(axis=1), axis=0) * 100,
431-
label=s_id,
432+
label=utils.convert_target(s_id, self.original_target_column),
432433
)
433434
for s_id, local_ex_df in self.local_explanation.items()
434435
]

ads/opctl/operator/lowcode/forecast/model/prophet.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,8 @@ def objective(trial):
118118
)
119119

120120
# Manual workaround because pandas 1.x dropped support for M and Y
121-
interval = self.spec.horizon.interval
122-
unit = self.spec.horizon.interval_unit
121+
interval = self.spec.horizon
122+
unit = self.spec.freq.split('-')[0] if self.spec.freq else None
123123
if unit == "M":
124124
unit = "D"
125125
interval = interval * 30.5
@@ -207,7 +207,7 @@ def objective(trial):
207207
for param in ["history", "history_dates", "stan_fit"]:
208208
if param in params:
209209
params.pop(param)
210-
self.model_parameters[target] = {
210+
self.model_parameters[utils.convert_target(target, self.original_target_column)] = {
211211
"framework": SupportedModels.Prophet,
212212
**params,
213213
}
@@ -293,12 +293,14 @@ def _generate_report(self):
293293
self.outputs[target], include_legend=True
294294
),
295295
target_columns=self.target_columns,
296+
original_target_column=self.original_target_column
296297
)
297298

298299
sec2_text = dp.Text(f"## Forecast Broken Down by Trend Component")
299300
sec2 = utils._select_plot_list(
300301
lambda idx, target, *args: self.models[target].plot_components(self.outputs[target]),
301302
target_columns=self.target_columns,
303+
original_target_column=self.original_target_column
302304
)
303305

304306
sec3_text = dp.Text(f"## Forecast Changepoints")
@@ -313,7 +315,9 @@ def _generate_report(self):
313315
for idx in range(len(self.target_columns))
314316
]
315317
sec3 = utils._select_plot_list(
316-
lambda idx, *args: sec3_figs[idx], target_columns=self.target_columns
318+
lambda idx, *args: sec3_figs[idx],
319+
target_columns=self.target_columns,
320+
original_target_column=self.original_target_column
317321
)
318322

319323
all_sections = [sec1_text, sec1, sec2_text, sec2, sec3_text, sec3]
@@ -374,7 +378,7 @@ def _generate_report(self):
374378
blocks = [
375379
dp.DataTable(
376380
local_ex_df.div(local_ex_df.abs().sum(axis=1), axis=0) * 100,
377-
label=s_id,
381+
label=utils.convert_target(s_id,self.original_target_column),
378382
)
379383
for s_id, local_ex_df in self.local_explanation.items()
380384
]

0 commit comments

Comments
 (0)