Skip to content

Commit 1cec8e8

Browse files
committed
relax error threshold
1 parent 7a2210c commit 1cec8e8

File tree

10 files changed

+157
-96
lines changed

10 files changed

+157
-96
lines changed

ads/opctl/operator/lowcode/common/transformations.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,15 @@ def run(self, data):
5959
clean_df = self._set_multi_index(clean_df)
6060

6161
if self.name == "historical_data":
62-
clean_df = self._missing_value_imputation_hist(clean_df)
62+
try:
63+
clean_df = self._missing_value_imputation_hist(clean_df)
64+
except Exception as e:
65+
logger.debug(f"Missing value imputation failed with {e.args}")
6366
if self.preprocessing:
64-
clean_df = self._outlier_treatment(clean_df)
67+
try:
68+
clean_df = self._outlier_treatment(clean_df)
69+
except Exception as e:
70+
logger.debug(f"Outlier Treatment failed with {e.args}")
6571
else:
6672
logger.debug("Skipping outlier treatment as preprocessing is disabled")
6773
elif self.name == "additional_data":
@@ -89,7 +95,7 @@ def _format_datetime_col(self, df):
8995
)
9096
except:
9197
raise InvalidParameterError(
92-
f"Unable to determine the datetime type for column: {self.dt_column_name} in dataset: {self.name}. Please specify the format explicitly. (For example adding 'format: %d/%m/%Y' underneath 'name: {self.dt_column_name}' in the datetime_column section of the yaml file. For reference, here is the first datetime given: {df[self.dt_column_name].values[0]}"
98+
f"Unable to determine the datetime type for column: {self.dt_column_name} in dataset: {self.name}. Please specify the format explicitly. (For example adding 'format: %d/%m/%Y' underneath 'name: {self.dt_column_name}' in the datetime_column section of the yaml file if you haven't already. For reference, here is the first datetime given: {df[self.dt_column_name].values[0]}"
9399
)
94100
return df
95101

ads/opctl/operator/lowcode/forecast/__main__.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,18 @@ def operate(operator_config: ForecastOperatorConfig) -> None:
2424
from .model.factory import ForecastOperatorModelFactory
2525

2626
datasets = ForecastDatasets(operator_config)
27-
ForecastOperatorModelFactory.get_model(operator_config, datasets).generate_report()
27+
try:
28+
ForecastOperatorModelFactory.get_model(
29+
operator_config, datasets
30+
).generate_report()
31+
except Exception as e:
32+
logger.debug(
33+
f"Failed to forecast with error {e.args}. Trying again with model `prophet`."
34+
)
35+
operator_config.spec.model = "prophet"
36+
ForecastOperatorModelFactory.get_model(
37+
operator_config, datasets
38+
).generate_report()
2839

2940

3041
def verify(spec: Dict, **kwargs: Dict) -> bool:

ads/opctl/operator/lowcode/forecast/model/arima.py

Lines changed: 57 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -61,66 +61,65 @@ def _train_model(self, i, s_id, df, model_kwargs):
6161
df: pd.DataFrame
6262
The dataframe containing the target data
6363
"""
64-
# try:
65-
target = self.original_target_column
66-
self.forecast_output.init_series_output(series_id=s_id, data_at_series=df)
67-
68-
# format the dataframe for this target. Dropping NA on target[df] will remove all future data
69-
data = self.preprocess(df, s_id)
70-
data_i = self.drop_horizon(data)
71-
72-
# Split data into X and y for arima tune method
73-
y = data_i[target]
74-
X_in = data_i.drop(target, axis=1) if len(data_i.columns) > 1 else None
75-
X_pred = self.get_horizon(data).drop(target, axis=1)
76-
77-
if self.loaded_models is not None:
78-
model = self.loaded_models[s_id]
79-
else:
80-
# Build and fit model
81-
model = pm.auto_arima(y=y, X=X_in, **model_kwargs)
82-
83-
fitted_values = model.predict_in_sample(X=X_in).values
84-
85-
# Predict and format forecast
86-
yhat, conf_int = model.predict(
87-
n_periods=self.spec.horizon,
88-
X=X_pred,
89-
return_conf_int=True,
90-
alpha=model_kwargs["alpha"],
91-
)
92-
yhat_clean = pd.DataFrame(yhat, index=yhat.index, columns=["yhat"])
64+
try:
65+
target = self.original_target_column
66+
self.forecast_output.init_series_output(series_id=s_id, data_at_series=df)
67+
68+
# format the dataframe for this target. Dropping NA on target[df] will remove all future data
69+
data = self.preprocess(df, s_id)
70+
data_i = self.drop_horizon(data)
71+
72+
# Split data into X and y for arima tune method
73+
y = data_i[target]
74+
X_in = data_i.drop(target, axis=1) if len(data_i.columns) > 1 else None
75+
X_pred = self.get_horizon(data).drop(target, axis=1)
76+
77+
if self.loaded_models is not None:
78+
model = self.loaded_models[s_id]
79+
else:
80+
# Build and fit model
81+
model = pm.auto_arima(y=y, X=X_in, **model_kwargs)
82+
83+
fitted_values = model.predict_in_sample(X=X_in).values
84+
85+
# Predict and format forecast
86+
yhat, conf_int = model.predict(
87+
n_periods=self.spec.horizon,
88+
X=X_pred,
89+
return_conf_int=True,
90+
alpha=model_kwargs["alpha"],
91+
)
92+
yhat_clean = pd.DataFrame(yhat, index=yhat.index, columns=["yhat"])
9393

94-
conf_int_clean = pd.DataFrame(
95-
conf_int, index=yhat.index, columns=["yhat_lower", "yhat_upper"]
96-
)
97-
forecast = pd.concat([yhat_clean, conf_int_clean], axis=1)
98-
logger.debug(f"-----------------Model {i}----------------------")
99-
logger.debug(forecast[["yhat", "yhat_lower", "yhat_upper"]].tail())
100-
101-
self.forecast_output.populate_series_output(
102-
series_id=s_id,
103-
fit_val=fitted_values,
104-
forecast_val=self.get_horizon(forecast["yhat"]).values,
105-
upper_bound=self.get_horizon(forecast["yhat_upper"]).values,
106-
lower_bound=self.get_horizon(forecast["yhat_lower"]).values,
107-
)
94+
conf_int_clean = pd.DataFrame(
95+
conf_int, index=yhat.index, columns=["yhat_lower", "yhat_upper"]
96+
)
97+
forecast = pd.concat([yhat_clean, conf_int_clean], axis=1)
98+
logger.debug(f"-----------------Model {i}----------------------")
99+
logger.debug(forecast[["yhat", "yhat_lower", "yhat_upper"]].tail())
100+
101+
self.forecast_output.populate_series_output(
102+
series_id=s_id,
103+
fit_val=fitted_values,
104+
forecast_val=self.get_horizon(forecast["yhat"]).values,
105+
upper_bound=self.get_horizon(forecast["yhat_upper"]).values,
106+
lower_bound=self.get_horizon(forecast["yhat_lower"]).values,
107+
)
108+
109+
self.models[s_id] = model
110+
111+
params = vars(model).copy()
112+
for param in ["arima_res_", "endog_index_"]:
113+
if param in params:
114+
params.pop(param)
115+
self.model_parameters[s_id] = {
116+
"framework": SupportedModels.Arima,
117+
**params,
118+
}
108119

109-
self.models[s_id] = model
110-
111-
params = vars(model).copy()
112-
for param in ["arima_res_", "endog_index_"]:
113-
if param in params:
114-
params.pop(param)
115-
self.model_parameters[s_id] = {
116-
"framework": SupportedModels.Arima,
117-
**params,
118-
}
119-
120-
logger.debug("===========Done===========")
121-
# except Exception as e:
122-
# self.errors_dict[s_id] = {"model_name": self.spec.model, "error": str(e)}
123-
# raise
120+
logger.debug("===========Done===========")
121+
except Exception as e:
122+
self.errors_dict[s_id] = {"model_name": self.spec.model, "error": str(e)}
124123

125124
def _build_model(self) -> pd.DataFrame:
126125
full_data_dict = self.datasets.get_data_by_series()

ads/opctl/operator/lowcode/forecast/model/automlx.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,11 @@ def set_kwargs(self):
4545
model_kwargs_cleaned.get("score_metric", AUTOMLX_DEFAULT_SCORE_METRIC),
4646
)
4747
model_kwargs_cleaned.pop("task", None)
48-
time_budget = model_kwargs_cleaned.pop("time_budget", 0)
48+
time_budget = model_kwargs_cleaned.pop("time_budget", None)
4949
model_kwargs_cleaned[
5050
"preprocessing"
5151
] = self.spec.preprocessing or model_kwargs_cleaned.get("preprocessing", True)
52-
return model_kwargs_cleaned
52+
return model_kwargs_cleaned, time_budget
5353

5454
def preprocess(self, data, series_id=None):
5555
return data.set_index(self.spec.datetime_column.name)
@@ -91,8 +91,7 @@ def _build_model(self) -> pd.DataFrame:
9191
)
9292

9393
# Clean up kwargs for pass through
94-
model_kwargs_cleaned = self.set_kwargs()
95-
time_budget = model_kwargs_cleaned.pop("time_budget", -1)
94+
model_kwargs_cleaned, time_budget = self.set_kwargs()
9695

9796
for i, (s_id, df) in enumerate(full_data_dict.items()):
9897
try:
@@ -170,7 +169,6 @@ def _build_model(self) -> pd.DataFrame:
170169
"model_name": self.spec.model,
171170
"error": str(e),
172171
}
173-
raise
174172

175173
logger.debug("===========Forecast Generated===========")
176174

@@ -325,13 +323,16 @@ def _custom_predict_fn(
325323
if row.index[0] > last_train_date:
326324
X_new = horizon_data.copy()
327325
X_new.loc[row.index[0]] = row.iloc[0]
328-
row_i = model.forecast(X=X_new, periods=self.spec.horizon)[
329-
[target_col]
330-
].loc[row.index[0]]
326+
row_i = (
327+
model.forecast(X=X_new, periods=self.spec.horizon)[[target_col]]
328+
.loc[row.index[0]]
329+
.values[0]
330+
)
331331
else:
332-
row_i = model.predict(X=row)
332+
row_i = model.predict(X=row).values[0][0]
333333
rows.append(row_i)
334-
return pd.concat(rows)[target_col].reset_index(drop=True)
334+
ret = np.asarray(rows).flatten()
335+
return ret
335336

336337
return _custom_predict_fn
337338

ads/opctl/operator/lowcode/forecast/model/base_model.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,6 @@ def generate_report(self):
131131
except Exception as e:
132132
logger.warn("Unable to generate Test Metrics.")
133133
logger.debug(f"Full Traceback: {traceback.format_exc()}")
134-
raise e # TODO remove
135134
report_sections = []
136135

137136
if self.spec.generate_report:
@@ -278,7 +277,10 @@ def generate_report(self):
278277
test_data=test_data,
279278
ci_interval_width=self.spec.confidence_interval_width,
280279
)
281-
forecast_plots = [forecast_text, series_subtext, forecast_sec]
280+
if series_name is not None and len(self.datasets.list_series_ids()) > 1:
281+
forecast_plots = [forecast_text, series_subtext, forecast_sec]
282+
else:
283+
forecast_plots = [forecast_text, forecast_sec]
282284

283285
yaml_appendix_title = dp.Text(f"## Reference: YAML File")
284286
yaml_appendix = dp.Code(code=self.config.to_yaml(), language="yaml")
@@ -529,7 +531,7 @@ def _save_report(
529531
logger.warn(
530532
"Unable to generate explanations for this model type or for this dataset."
531533
)
532-
raise
534+
logger.debug(f"Got error: {e.args}")
533535

534536
if self.spec.generate_model_parameters:
535537
# model params
@@ -662,6 +664,14 @@ def explain_model(self):
662664
)
663665
kernel_explnr_vals = kernel_explnr.shap_values(data_trimmed)
664666

667+
exp_end_time = time.time()
668+
global_ex_time = global_ex_time + exp_end_time - exp_start_time
669+
670+
self.local_explainer(
671+
kernel_explnr, series_id=s_id, datetime_col_name=datetime_col_name
672+
)
673+
local_ex_time = local_ex_time + time.time() - exp_end_time
674+
665675
if not len(kernel_explnr_vals):
666676
logger.warn(
667677
f"No explanations generated. Ensure that additional data has been provided."
@@ -673,13 +683,7 @@ def explain_model(self):
673683
np.average(np.absolute(kernel_explnr_vals[:, 1:]), axis=0),
674684
)
675685
)
676-
exp_end_time = time.time()
677-
global_ex_time = global_ex_time + exp_end_time - exp_start_time
678686

679-
self.local_explainer(
680-
kernel_explnr, series_id=s_id, datetime_col_name=datetime_col_name
681-
)
682-
local_ex_time = local_ex_time + time.time() - exp_end_time
683687
logger.info(
684688
"Global explanations generation completed in %s seconds", global_ex_time
685689
)

ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,16 @@ class AdditionalData(AbstractData):
6565
def __init__(self, spec, historical_data):
6666
if spec.additional_data is not None:
6767
super().__init__(spec=spec, name="additional_data")
68+
add_dates = self.data.index.get_level_values(0).unique().tolist()
69+
add_dates.sort()
70+
if historical_data.get_max_time() > add_dates[-spec.horizon]:
71+
raise DataMismatchError(
72+
f"The Historical Data ends on {historical_data.get_max_time()}. The additional data horizon starts on {add_dates[-spec.horizon]}. The horizon should have exactly {spec.horizon} dates after the Hisotrical at a frequency of {historical_data.freq}"
73+
)
74+
elif historical_data.get_max_time() != add_dates[-(spec.horizon + 1)]:
75+
raise DataMismatchError(
76+
f"The Additional Data must be present for all historical data and the entire horizon. The Historical Data ends on {historical_data.get_max_time()}. The additonal data horizon starts after {add_dates[-(spec.horizon+1)]}. These should be the same date."
77+
)
6878
else:
6979
self.name = "additional_data"
7080
self.data = None

ads/opctl/operator/lowcode/forecast/model/neuralprophet.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,6 @@ def _train_model(self, i, s_id, df, model_kwargs):
209209
logger.debug("===========Done===========")
210210
except Exception as e:
211211
self.errors_dict[s_id] = {"model_name": self.spec.model, "error": str(e)}
212-
raise e
213212

214213
def _build_model(self) -> pd.DataFrame:
215214
full_data_dict = self.datasets.get_data_by_series()
@@ -402,7 +401,6 @@ def _generate_report(self):
402401
# Do not fail the whole run due to explanations failure
403402
logger.warn(f"Failed to generate Explanations with error: {e}.")
404403
logger.debug(f"Full Traceback: {traceback.format_exc()}")
405-
raise
406404

407405
model_description = dp.Text(
408406
"NeuralProphet is an easy to learn framework for interpretable time "

ads/opctl/operator/lowcode/forecast/model/prophet.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,6 @@ def _train_model(self, i, series_id, df, model_kwargs):
131131
"model_name": self.spec.model,
132132
"error": str(e),
133133
}
134-
raise
135134

136135
def _build_model(self) -> pd.DataFrame:
137136
from prophet import Prophet

ads/opctl/operator/lowcode/forecast/utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,6 @@ def evaluate_train_metrics(output, metrics_col_name=None):
250250
f"Failed to generate training metrics for target_series: {s_id}"
251251
)
252252
logger.debug(f"Recieved Error Statement: {e}")
253-
raise
254253
return total_metrics
255254

256255

0 commit comments

Comments
 (0)