Skip to content

Commit 02267d5

Browse files
committed
test case added for disabling outlier treatment
1 parent 1db69ab commit 02267d5

File tree

3 files changed

+62
-13
lines changed

3 files changed

+62
-13
lines changed

ads/opctl/operator/lowcode/forecast/operator_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class DateTimeColumn(DataClassSerializable):
3131

3232
@dataclass(repr=True)
3333
class PreprocessingSteps(DataClassSerializable):
34-
"""Class representing operator specification preprocessing details."""
34+
"""Class representing preprocessing steps for operator."""
3535

3636
missing_value_imputation: bool = True
3737
outlier_treatment: bool = True

ads/opctl/operator/lowcode/forecast/schema.yaml

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -295,14 +295,18 @@ spec:
295295
default: true
296296
meta:
297297
description: "preprocessing and feature engineering can be disabled using this flag, Defaults to true"
298-
missing_value_imputation:
299-
type: boolean
300-
required: false
301-
default: true
302-
outlier_detection:
303-
type: boolean
298+
steps:
299+
type: dict
304300
required: false
305-
default: true
301+
schema:
302+
missing_value_imputation:
303+
type: boolean
304+
required: false
305+
default: true
306+
outlier_treatment:
307+
type: boolean
308+
required: false
309+
default: true
306310

307311
generate_explanations:
308312
type: boolean

tests/operators/forecast/test_errors.py

Lines changed: 50 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
ForecastInputDataError,
2727
)
2828
from ads.opctl.operator.cmd import run
29-
29+
import math
3030

3131
NUM_ROWS = 1000
3232
NUM_SERIES = 10
@@ -172,8 +172,9 @@ def run_yaml(tmpdirname, yaml_i, output_data_path):
172172
run(yaml_i, backend="operator.local", debug=True)
173173
subprocess.run(f"ls -a {output_data_path}", shell=True)
174174

175-
test_metrics = pd.read_csv(f"{tmpdirname}/results/test_metrics.csv")
176-
print(test_metrics)
175+
if 'test_data' in yaml_i['spec']:
176+
test_metrics = pd.read_csv(f"{tmpdirname}/results/test_metrics.csv")
177+
print(test_metrics)
177178
train_metrics = pd.read_csv(f"{tmpdirname}/results/metrics.csv")
178179
print(train_metrics)
179180

@@ -205,7 +206,8 @@ def populate_yaml(
205206
yaml_i["spec"]["datetime_column"]["name"] = "Date"
206207
yaml_i["spec"]["target_category_columns"] = ["Store"]
207208
yaml_i["spec"]["horizon"] = HORIZON
208-
yaml_i["spec"]["preprocessing"] = preprocessing
209+
if preprocessing:
210+
yaml_i["spec"]["preprocessing"] = preprocessing
209211
if generate_train_metrics:
210212
yaml_i["spec"]["generate_metrics"] = generate_train_metrics
211213
if model == "autots":
@@ -431,6 +433,49 @@ def test_invalid_dates(operator_setup, model):
431433
)
432434

433435

436+
def test_disabling_outlier_treatment(operator_setup):
437+
tmpdirname = operator_setup
438+
NUM_ROWS = 100
439+
hist_data_0 = pd.concat(
440+
[
441+
HISTORICAL_DATETIME_COL[: NUM_ROWS - HORIZON],
442+
TARGET_COL[: NUM_ROWS - HORIZON],
443+
],
444+
axis=1,
445+
)
446+
outliers = [1000, -800]
447+
hist_data_0.at[40, 'Sales'] = outliers[0]
448+
hist_data_0.at[75, 'Sales'] = outliers[1]
449+
historical_data_path, additional_data_path, test_data_path = setup_artificial_data(
450+
tmpdirname, hist_data_0
451+
)
452+
453+
yaml_i, output_data_path = populate_yaml(
454+
tmpdirname=tmpdirname,
455+
model="arima",
456+
historical_data_path=historical_data_path
457+
)
458+
yaml_i["spec"].pop("target_category_columns")
459+
yaml_i["spec"].pop("additional_data")
460+
461+
# running default pipeline where outlier will be treated
462+
run_yaml(tmpdirname=tmpdirname, yaml_i=yaml_i, output_data_path=output_data_path)
463+
forecast_without_outlier = pd.read_csv(f"{tmpdirname}/results/forecast.csv")
464+
input_vals_without_outlier = set(forecast_without_outlier['input_value'])
465+
assert all(
466+
item not in input_vals_without_outlier for item in outliers), "forecast file should not contain any outliers"
467+
468+
# switching off outlier_treatment
469+
preprocessing_steps = {"missing_value_imputation": True, "outlier_treatment": False}
470+
preprocessing = {"enabled": True, "steps": preprocessing_steps}
471+
yaml_i["spec"]["preprocessing"] = preprocessing
472+
run_yaml(tmpdirname=tmpdirname, yaml_i=yaml_i, output_data_path=output_data_path)
473+
forecast_with_outlier = pd.read_csv(f"{tmpdirname}/results/forecast.csv")
474+
input_vals_with_outlier = set(forecast_with_outlier['input_value'])
475+
assert all(
476+
item in input_vals_with_outlier for item in outliers), "forecast file should contain all the outliers"
477+
478+
434479
@pytest.mark.parametrize("model", MODELS)
435480
def test_2_series(operator_setup, model):
436481
# Test w and w/o add data
@@ -456,7 +501,7 @@ def split_df(df):
456501
historical_data_path, additional_data_path, test_data_path = setup_artificial_data(
457502
tmpdirname, hist_data, add_data, test_data
458503
)
459-
preprocessing_steps = {"missing_value_imputation": True, "outlier_detection": False}
504+
preprocessing_steps = {"missing_value_imputation": True, "outlier_treatment": False}
460505
yaml_i, output_data_path = populate_yaml(
461506
tmpdirname=tmpdirname,
462507
model=model,

0 commit comments

Comments
 (0)